互联网+量化投资 大数据指数手把手

最后更新于:2022-04-01 21:55:54

# 互联网+量化投资 大数据指数手把手 > 来源:https://uqer.io/community/share/55263359f9f06c8f3390457b ## 策略简介 从公司基本面、市场驱动指标、市场情绪等多维度验证拥有“天时、地利、人和”的大牛股,让每个人都能生产符合自己投资理念的大数据指数。实现中参考了水星社区中的牛人@吴宇笛的因子计分卡策略。 本策略的参数如下: + 起始日期: 2014年1月1日 + 结束日期: 2016年5月18日 + 股票池: 上证50 + 业绩基准: 上证50 + 起始资金: 100000元 + 调仓周期: 1个月 策略参数获取: + 十日移动均线(MA10) 60日移动均线(MA60) 资产回报率(ROA) 市盈率(PE) 对数市值(LCAP) 波幅中位数(DHILO) 净利润/营业总收入(NPToTOR) 产权比率(DebtEquityRatio) 营业利润同比增长(OperatingProfitGrowRate) 总资产同比增长(TotalAssetGrowRate) 均可以通过`DataAPI.MktStockFactorsDateRangeGet`获得 + 市场新闻热度指标可以通过`DataAPI.NewsHeatIndexGet`获得 + 市场情绪指标可以通过`DataAPI.NewsSentimentIndexGet`获得;与新闻热度指标一样,都是DataYes利用大数据分析从海量关联新闻中提取出来的 ## 调仓策略 (1) 对每只股票获取之前的120个交易日的收盘价,计算20日累计收益,共得到100个收益率数据 (2) 获取该股票同期的100个交易日的基本面、市场驱动指标和市场热度、情绪指标,分别计算均值、标准差,并进行中心化 (3) 以该股票20日累计收益率为因变量,基本面、市场驱动指标和市场热度、情绪指标为自变量进行[弹性网 ( ElasticNet ) 回归](http://scikit-learn.org/stable/modules/linear_model.html) (4) 获取该股票前一日的基本面、市场驱动指标和市场热度、情绪指标 (5) 对该股票前一日的基本面、市场驱动指标和市场热度、情绪指标,依据前100个交易日的均值和标准差,置相对大小为 (前一日值 - 均值)/ 标准差 并四舍五入,作为在该项因子上的得分 (6) 根据之前计算出的权重对这些得分进行加总,得到该股票的得分,并以此为指数进行股票筛选 (7) 根据指数得分排序,选取总分最高的前五支股票作为买入列表 (8) 根据买入列表调仓 ```py import pandas as pd import numpy as np import statsmodels.api as sm import statsmodels.regression.linear_model as lm from sklearn.linear_model import ElasticNet from CAL.PyCAL import * used_factors = ['MA10', 'MA60', 'ROA', 'PE', 'LCAP', 'DHILO', 'DebtEquityRatio', 'OperatingProfitGrowRate', 'TotalAssetGrowRate', 'NPToTOR'] #used_factors = ['ASSI', 'EBITToTOR', 'ETP5', 'MA60', 'HSIGMA', 'PE', 'VOL60', 'SUE', 'DAVOL20', 'TotalAssetGrowRate'] def StockFactorsGet(universe, trading_days): data_all = {} for i,stock in enumerate(universe): try: data = DataAPI.MktStockFactorsDateRangeGet(secID = stock, beginDate = trading_days[0], endDate = trading_days[-1], field = ['tradeDate'] + used_factors) # data['tradeDate'] = pd.to_datetime(data['tradeDate']) except Exception, e: print e try: news_data = DataAPI.NewsHeatIndexGet(secID = stock, beginDate = trading_days[0], endDate = trading_days[-1]) heatIndex = news_data.set_index('newsPublishDate').sort_index().reset_index()[['heatIndex','newsPublishDate']] heatIndex['flag'] = heatIndex['newsPublishDate'].apply(lambda x: True if x in data.tradeDate.values else False) heatIndex = heatIndex[heatIndex.flag].reset_index() data = pd.merge(data, heatIndex, how = 'inner', left_index = 'tradeDate', right_index = 'newsPublishDate').drop(['index','newsPublishDate','flag'], 1) except Exception, e: data['heatIndex'] = 0 try: emotion_data = DataAPI.NewsSentimentIndexGet(secID = stock, beginDate = trading_days[0], endDate = trading_days[-1]) emotionIndex = emotion_data.set_index('newsPublishDate').sort_index().reset_index()[['sentimentIndex','newsPublishDate']] emotionIndex['flag'] = emotionIndex['newsPublishDate'].apply(lambda x: True if x in data.tradeDate.values else False) emotionIndex = emotionIndex[emotionIndex.flag].reset_index() data = pd.merge(data, emotionIndex, how = 'inner', left_index = 'tradeDate', right_index = 'newsPublishDate').drop(['index','newsPublishDate','flag'], 1) except Exception, e: # print 'emotion', stock, e data['sentimentIndex'] = 0 data['news_emotion'] = data['heatIndex'] * data['sentimentIndex'] data_all[stock] = data return data_all def StockRegDataGet(stock, trading_days, factors, shift = 20): start = trading_days[0] end = trading_days[-1] data = factors[(factors.tradeDate >= start.strftime('%Y-%m-%d')) & (factors.tradeDate <= end.strftime('%Y-%m-%d'))][:-shift] ret = DataAPI.MktEqudGet(secID = stock, beginDate = start.strftime('%Y%m%d'), endDate = end.strftime('%Y%m%d'), field = ['tradeDate', 'closePrice']) ret['fwdPrice'] = ret['closePrice'].shift(-shift) ret['return'] = ret['fwdPrice'] / ret['closePrice'] - 1. ret = ret[:-shift] data = data.merge(ret, how = 'inner', left_on = ['tradeDate'], right_on = ['tradeDate']) data = data.loc[:, ['return', 'heatIndex', 'sentimentIndex', 'news_emotion'] + used_factors] return data def GetRegressionResult(data): data = data.dropna() all_factors = ['heatIndex', 'sentimentIndex', 'news_emotion'] + used_factors for f in all_factors: if data[f].std() == 0: continue data[f] = (data[f] - data[f].mean()) / data[f].std() y = np.array(data['return'].tolist()) x = [] for f in all_factors: x.append(data[f].tolist()) x = np.column_stack(tuple(x)) x = np.array( [ np.append(v,1) for v in x] ) en = ElasticNet(fit_intercept=True, alpha=0) en.fit(x, y) res = en.coef_[:-1] w = dict(zip(all_factors, res)) return w def preparing(universe, date, factors_all): date = Date(date.year, date.month, date.day) cal = Calendar('China.SSE') start = cal.advanceDate(date, '-120B', BizDayConvention.Following) end = cal.advanceDate(date, '-1B', BizDayConvention.Following) start = datetime(start.year(), start.month(), start.dayOfMonth()) end = datetime( end.year(), end.month(), end.dayOfMonth()) trading_days = quartz.utils.tradingcalendar.get_trading_days(start, end) datas, means, vols, weights = {}, {}, {}, {} for i,stock in enumerate(universe): try: datas[stock] = StockRegDataGet(stock, trading_days, factors_all[stock]) means[stock] = dict(datas[stock].mean()) vols[stock] = dict(datas[stock].std()) weights[stock] = GetRegressionResult(datas[stock]) except Exception, e: pass return means, vols, weights ``` ```py from datetime import datetime end = datetime(2016, 5, 18) f_start = datetime(2014, 1, 1) universe = set_universe('SH50') f_days = quartz.utils.tradingcalendar.get_trading_days(f_start, end) factors_all = StockFactorsGet(universe, f_days) ``` ```py from datetime import datetime start = datetime(2014, 6, 1) end = datetime(2016, 5, 18) benchmark = 'SH50' universe = set_universe('SH50') capital_base = 100000 refresh_rate = 20 # f_start = datetime(2012, 6, 1) # f_days = quartz.utils.tradingcalendar.get_trading_days(f_start, end) # factors_all = StockFactorsGet(universe, f_days) def initialize(account): pass def handle_data(account): print account.current_date means, vols, weights = preparing(account.universe, account.current_date, factors_all) cal = Calendar('China.SSE') date = Date(account.current_date.year, account.current_date.month, account.current_date.day) date = cal.advanceDate(date, '-1B', BizDayConvention.Following) date = datetime(date.year(), date.month(), date.dayOfMonth()) factors_cur = StockFactorsGet(account.universe, [date]) score = {} all_factors = ['heatIndex', 'sentimentIndex', 'news_emotion'] + used_factors for stock in account.universe: if stock not in weights: continue fac = factors_cur[stock] s = 0 for f in all_factors: try: x = fac[f].iloc[-1] x = (x - means[stock][f])/vols[stock][f] s += weights[stock][f] * int(round(x)) except: pass score[stock] = s buylist = sorted(score.keys(), key = lambda x: score[x])[-5:] rebalance(account, buylist) def rebalance(account, buylist): for stock in account.valid_secpos: if stock not in buylist: order_to(stock, 0) for stock in buylist: order(stock, account.referencePortfolioValue / len(buylist) / account.referencePrice[stock]) 2014-06-03 00:00:00 2014-07-01 00:00:00 2014-07-29 00:00:00 2014-08-26 00:00:00 2014-09-24 00:00:00 2014-10-29 00:00:00 2014-11-26 00:00:00 2014-12-24 00:00:00 2015-01-23 00:00:00 2015-02-27 00:00:00 2015-03-27 00:00:00 2015-04-27 00:00:00 2015-05-26 00:00:00 2015-06-24 00:00:00 2015-07-22 00:00:00 2015-08-19 00:00:00 2015-09-18 00:00:00 2015-10-23 00:00:00 ```
';