本系列将持续更新,标题为 【共享函数】
移动窗口(rolling)和 拓展窗口(expanding)
ADF 平稳性检验(有点小问题,修改方法):
from statsmodels.tsa.stattools import adfuller import pandas as pd for key,value in enumerate(dftest[4]):
抛物线转向(Stop And Reverse)(注意更改大小写,如Date>date)
s = psar(get_bars('600741.XSHG',count=10,fields=['date','close', 'high', 'low'])); pd.DataFrame(s)
# 更多关于 rolling 函数的用法见下: 移动窗口(rolling)和 拓展窗口(expanding)import pandas as pddef HHV(srcuritys,windows,end_date=None,count=20,fq='pre'):'''传入,股票列表,时间窗口,查询日期(包含当天,返回的数量,复权方式'''df = get_price(srcuritys,end_date=end_date,count=count+windows-1,fq=fq,fields='high')# return pd.rolling_max(df.high,windows).dropna() # pandas 0.16return df.high.rolling(window = windows).max().dropna() # pandas 0.20+###########################################################def LLV(srcuritys,windows,end_date=None,count=20,fq='pre'):'''传入,股票列表,时间窗口,查询日期(包含当天,返回的数量,复权方式'''df = get_price(srcuritys,end_date=end_date,count=count+windows-1,fq=fq,fields='low')# return pd.rolling_min(df.low,windows).dropna() # pandas 0.16return df.low.rolling(window = windows).min().dropna() # pandas 0.20+# HHV(['600741.XSHG','600507.XSHG'],10,'2018-09-04')LLV(['600741.XSHG','600507.XSHG'],10,'2018-09-04').tail()
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
600741.XSHG | 600507.XSHG | |
2018-08-29 | 19.2 | 11.02 |
2018-08-30 | 19.2 | 11.02 |
2018-08-31 | 19.2 | 11.02 |
2018-09-03 | 19.5 | 10.92 |
2018-09-04 | 19.8 | 10.92 |
import numpy as npimport talib# MACDdef MACD(security_list,date=None,period='1d', fastperiod=12, slowperiod=26, signalperiod=9):if isinstance(security_list, str):security_list = [security_list]# 计算 MACD# security_data = history(slowperiod*3, period, 'close' , security_list, df=False, skip_paused=True)security_data = get_bars(security_list,slowperiod*3,period,fields='close',include_now=True,end_dt=date)macd_DIF = {}; macd_DEA = {}; macd_HIST = {}for stock in security_list:nan_count = list(np.isnan(security_data[stock]['close'])).count(True)if nan_count == len(security_data[stock]):log.info("股票 %s 输入数据全是 NaN,该股票可能已退市或刚上市,返回 NaN 值数据。" %stock)macd_DIF[stock] = array([np.nan])macd_DEA[stock] = array([np.nan])macd_HIST[stock]= array([np.nan])else:macd_DIF[stock], macd_DEA[stock], macd = talib.MACDEXT(security_data[stock]['close'], fastperiod=fastperiod, fastmatype=1, slowperiod=slowperiod, slowmatype=1, signalperiod=signalperiod, signalmatype=1)macd_HIST[stock] = macd * 2return macd_DIF, macd_DEA, macd_HISTmacd_DIF, macd_DEA, macd_HIST = MACD(['600741.XSHG','600507.XSHG'],date='2019-02-18 13:00:00',period='5m')macd_HIST['600741.XSHG'][-1] #取600741最新的5分钟bar生成的最新的一条macd值
from functools import reducedef SMA(security_list,date=None,period='1d', timeperiod=5) :if isinstance(security_list, str):security_list = [security_list]# 计算 SMA# security_data = history(timeperiod*2, '1d', 'close' , security_list, df=False, skip_paused=True)security_data = get_bars(security_list,timeperiod*3,period,fields='close',include_now=True,end_dt=date)sma = {}for stock in security_list:close = np.nan_to_num(security_data[stock]['close'])sma[stock] = reduce(lambda x, y: ((timeperiod - 1) * x + y) / timeperiod, close)return smaSMA(['600741.XSHG','600507.XSHG'])
{'600507.XSHG': 11.768257144189747, '600741.XSHG': 20.297430244584653}
rolling 是对窗口进行移动处理,窗口大小自定义,大小不变
import pandas as pdc_data = get_price(['600741.XSHG','600507.XSHG','000001.XSHE'],fields='close',end_date='2018-09-26',count=100).close
# 求序列的HHV5# pd.rolling_max(c_data,window=5,min_periods=None, freq=None, center=False, how='max').tail() #pandas0.16用法 c_data.rolling(window=5, min_periods=None, freq=None, center=False, win_type=None, on=None, axis=0, closed=None).max().tail() #pandas 0.23用法
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
600741.XSHG | 600507.XSHG | 000001.XSHE | |
2018-09-19 | 20.36 | 10.88 | 10.24 |
2018-09-20 | 20.36 | 10.88 | 10.24 |
2018-09-21 | 21.79 | 11.04 | 10.67 |
2018-09-25 | 21.79 | 11.04 | 10.67 |
2018-09-26 | 21.82 | 11.10 | 10.71 |
#求序列过去一段时间所产生的历史最高价序列# pd.expanding_max(c_data,min_periods=0, freq=None).head() #pandas0.16用法 c_data.expanding(min_periods=1, freq=None).max().tail() #pandas 0.23用法
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
600741.XSHG | 600507.XSHG | 000001.XSHE | |
2018-09-19 | 24.75 | 13.59 | 11.01 |
2018-09-20 | 24.75 | 13.59 | 11.01 |
2018-09-21 | 24.75 | 13.59 | 11.01 |
2018-09-25 | 24.75 | 13.59 | 11.01 |
2018-09-26 | 24.75 | 13.59 | 11.01 |
#求序列的 MA5 序列# pd.rolling_mean(c_data,window=5,min_periods=None, freq=None, center=False, how='max')[-5:]#pandas0.16用法 c_data.rolling(window=5, min_periods=None, freq=None, center=False, win_type=None, on=None, axis=0, closed=None).mean().tail() #pandas 0.23用法
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
600741.XSHG | 600507.XSHG | 000001.XSHE | |
2018-09-19 | 20.114 | 10.676 | 9.962 |
2018-09-20 | 20.126 | 10.728 | 10.016 |
2018-09-21 | 20.412 | 10.814 | 10.182 |
2018-09-25 | 20.706 | 10.910 | 10.354 |
2018-09-26 | 21.054 | 10.954 | 10.480 |
import pandas as pd#注意ema有用到回溯算法,所以出于指标精度方面考虑建议数据的长度至少为span的2-3倍,且前期数据可能不太准确,如需使用到更多ema数据#需要增加依赖数据的长度c_data = get_price(['600741.XSHG','600507.XSHG','000001.XSHE'],fields='close',end_date='2018-09-26', fq=None,count=10*3).close# pd.ewma(c_data,span=5)[-5:] # pandas 0.16c_data.ewm(adjust=True,span=5,ignore_na=False,min_periods=0).mean().tail() #pandas 0.23
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
600741.XSHG | 600507.XSHG | 000001.XSHE | |
2018-09-19 | 20.140727 | 10.752822 | 10.026004 |
2018-09-20 | 20.120484 | 10.781882 | 10.094004 |
2018-09-21 | 20.676996 | 10.867922 | 10.286005 |
2018-09-25 | 20.907999 | 10.891948 | 10.374004 |
2018-09-26 | 21.212001 | 10.961299 | 10.486003 |
#使用新浪数据 000001.XSHE 在 2018年9月27日的数据计算的结果:round((88.04/240)/(594.38/1200),2)
import talib as tldef VPT(close, volume, fperiod, lperiod) :'''close,收盘价序列 volume,成交量序列 fperiod,短周期 lperiod,长周期'''close = np.nan_to_num(close)volume = np.nan_to_num(volume)pratio = map(lambda x, y : 1 if y == 0 else (x / y), close[1:], close[:-1])pratio1 = list(map(lambda x : x - 1, pratio)) pratio1 = np.array(pratio1)pratio1 = np.append(pratio1[0], pratio1) vp = list(map(lambda x, y : x * y / 100, pratio1, volume))vp = np.array(vp)vpt = tl.SUM(vp, timeperiod=fperiod)vpt = np.nan_to_num(vpt[:-1])m*pt = tl.MA(vpt, lperiod)m*pt = np.nan_to_num(m*pt)return vpt, m*ptdata = get_bars('600741.XSHG',fields=['close','volume'],end_dt='2019-02-18 13:00:00',unit='1d',count=50,include_now=True)VPT(data['close'], data['volume'], 5, 10)
(array([ 0. , 0. , 0. , 0. , 11050.70227701, 8607.23338545, 1553.31822559, -2108.07434905, -864.31262748, 2058.66414909, 836.91618346, 1468.40900387, 1523.23453819, 2259.56278032, -994.91286139, 1164.74640747, 1100.17033377, -828.77222826, -1255.02819621, 3292.25593718, 1538.77116777, 615.531841 , -3323.09444235, -3307.08024403, -4176.36525518, -3386.81056247, 9194.5677398 , 14788.00256834, 13884.31116372, 11290.03226475, 12792.57322021, 1900.32079054, 2587.35803075, 4766.75751288, 5428.40293075, 2461.99893611, 3103.33846488, 4106.77409889, 2686.71666716, 3861.70672945, 2983.38409295, 674.05781133, -168.00216517, -566.13327048, -1477.17416535, 2127.97574405, 4157.09566718, 2256.85267735, -11820.49255068]), array([ 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 2029.75310606, 2113.44472441, 2260.28562479, 2412.60907861, 2638.56535664, 1434.00384281, 689.75514501, 644.44035582, 772.3705679 , 733.29901103, 856.65818984, 926.84368827, 841.55597198, 356.92307393, -199.74122851, -517.88646788, -973.04216488, -163.60242427, 1398.07505538, 2912.00899138, 3711.78662413, 4837.16682938, 4965.64572433, 5556.69097164, 6364.07474733, 7324.55156593, 7909.43251578, 7300.30958829, 6232.18674135, 5112.42729169, 4369.59473816, 3388.67582544, 3266.04952752, 2990.51350792, 2457.22442959, 1766.66671998, 1733.26440077, 1838.640121 , 1653.64797885, 202.92705706]))
from statsmodels.tsa.stattools import adfuller import pandas as pd # for key,value in enumerate(dftest[4]):def test_stationarity(timeseries):#滚动平均,差分,标准差rollmean = pd.rolling_mean(timeseries,window = 12)ts_diff = timeseries - timeseries.shift()rollstd = pd.rolling_std(timeseries,window = 12)original = timeseries.plot(color = "blue", label = "Original")mean = rollmean.plot(color = "red",label = "Rolling 12 Mean")std = rollstd.plot(color = "black", label = "Rolling 12 Std")diff = ts_diff.plot(color = "green", label = "Diff 1")plt.legend(loc = "best")plt.title("Rolling Mean, Standard Deviation and Diff 1")l1 = plt.axhline(y = 0, linewidth = 1, color = "yellow")plt.show(block = False)#ADFAIC检验print("")print("Result of Augment Dickry-Fuller TestAIC")dftest = adfuller(timeseries, autolag = "AIC")dfoutput = pd.Series(dftest[0:4],index = ["Test Statistic","p-value","Lags Used","Numbers of observation Used"])for key,value in enumerate(dftest[4]):dfoutput["Critical values(%s)"%key] = valueprint(dfoutput)print("")print("Result of Augment Dickry-Fuller TestBIC")dftest = adfuller(timeseries, autolag = "BIC")dfoutput = pd.Series(dftest[0:4],index = ["Test Statistic","p-value","Lags Used","Numbers of observation Used"])# for key,value in dftest[4]:for key,value in enumerate(dftest[4]):dfoutput["Critical values(%s)"%key] = valueprint(dfoutput)ts = get_price('000001.XSHG').closetest_stationarity(ts)
/opt/conda/lib/python3.5/site-packages/ipykernel_launcher.py:7: FutureWarning: pd.rolling_mean is deprecated for Series and will be removed in a future version, replace with Series.rolling(center=False,window=12).mean() import sys /opt/conda/lib/python3.5/site-packages/ipykernel_launcher.py:9: FutureWarning: pd.rolling_std is deprecated for Series and will be removed in a future version, replace with Series.rolling(center=False,window=12).std() if __name__ == '__main__':
Result of Augment Dickry-Fuller TestAIC Test Statistic -1.76323 p-value 0.398864 Lags Used 4 Numbers of observation Used 239 Critical values(0) 10% Critical values(1) 1% Critical values(2) 5% dtype: object Result of Augment Dickry-Fuller TestBIC Test Statistic -1.38346 p-value 0.590182 Lags Used 2 Numbers of observation Used 241 Critical values(0) 10% Critical values(1) 1% Critical values(2) 5% dtype: object
def FindPeakValley(df, first, last, window):lowPrice = df['low'][first]highPrice = df['high'][first]lowPos = highPos = firstupThreshold = lowPrice * (1+window)downThreshold = highPrice * (1-window)trend = -1vertex = []for i in range(first + 1, last):if(trend == -1 and df['high'][i]>upThreshold):trend = 1highPrice = df['high'][i]highPos = idownThreshold = highPrice * (1-window)vertex.append(lowPos)elif (trend == 1 and df['low'][i]<downThreshold):trend = -1lowPrice= df['low'][i]lowPos = iupThreshold = lowPrice * (1+window)vertex.append(highPos)if (df['high'][i] > highPrice):highPrice = df['high'][i]highPos = idownThreshold = highPrice * (1-window)if (df['low'][i] < lowPrice):lowPrice = df['low'][i]lowPos= iupThreshold = lowPrice * (1+window)return vertexdf = get_price('600507.XSHG')FindPeakValley(df,0,len(df),0.5)
[25, 111, 124, 144]
def psar(barsdata, iaf = 0.02, maxaf = 0.2):'''R(n)=SAR(n-1) AF*[EP(n-1)-SAR(n-1)] 其中:SAR(n)为第n日的SAR值,SAR(n-1)为第(n-1)日的值;AF为加速因子(或叫加速系数),EP为极点价(最高价或最低价)。'''length = len(barsdata)dates = list(barsdata['date'])high = list(barsdata['high'])low = list(barsdata['low'])close = list(barsdata['close'])psar = close[0:len(close)]psarbull = [None] * lengthpsarbear = [None] * lengthbull = Trueaf = iafep = low[0]hp = high[0]lp = low[0]for i in range(2,length):if bull:psar[i] = psar[i - 1] + af * (hp - psar[i - 1])else:psar[i] = psar[i - 1] + af * (lp - psar[i - 1])reverse = Falseif bull:if low[i] < psar[i]:bull = Falsereverse = Truepsar[i] = hplp = low[i]af = iafelse:if high[i] > psar[i]:bull = Truereverse = Truepsar[i] = lphp = high[i]af = iafif not reverse:if bull:if high[i] > hp:hp = high[i]af = min(af + iaf, maxaf)if low[i - 1] < psar[i]:psar[i] = low[i - 1]if low[i - 2] < psar[i]:psar[i] = low[i - 2]else:if low[i] < lp:lp = low[i]af = min(af + iaf, maxaf)if high[i - 1] > psar[i]:psar[i] = high[i - 1]if high[i - 2] > psar[i]:psar[i] = high[i - 2]if bull:psarbull[i] = psar[i]else:psarbear[i] = psar[i] return {"dates":dates, "high":high, "low":low, "close":close, "psar":psar, "psarbear":psarbear, "psarbull":psarbull}s = psar(get_bars('600741.XSHG',count=10,fields=['date','close', 'high', 'low']))pd.DataFrame(s)
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
close | dates | high | low | psar | psarbear | psarbull | |
0 | 20.72 | 2019-01-28 | 21.05 | 20.51 | 20.720000 | NaN | NaN |
1 | 20.29 | 2019-01-29 | 21.06 | 20.01 | 20.290000 | NaN | NaN |
2 | 20.08 | 2019-01-30 | 20.61 | 20.01 | 21.050000 | 21.050000 | NaN |
3 | 20.21 | 2019-01-31 | 20.45 | 20.16 | 21.060000 | 21.060000 | NaN |
4 | 20.35 | 2019-02-01 | 20.47 | 20.08 | 21.039000 | 21.039000 | NaN |
5 | 20.42 | 2019-02-11 | 20.50 | 20.01 | 21.018420 | 21.018420 | NaN |
6 | 20.74 | 2019-02-12 | 20.94 | 20.36 | 20.998252 | 20.998252 | NaN |
7 | 21.04 | 2019-02-13 | 21.28 | 20.56 | 20.010000 | NaN | 20.0100 |
8 | 20.71 | 2019-02-14 | 21.04 | 20.58 | 20.035400 | NaN | 20.0354 |
9 | 19.45 | 2019-02-15 | 20.85 | 19.30 | 21.280000 | 21.280000 | NaN |
import statsmodels.api as smdef calculate_alpha_beta(securities, index = '000001.XSHG', unit = '1d', count = 20, field = 'close', normalize = True, use_return = False):stock_list = []stock_list = stock_list + securitiesstock_list.append(index)prices = history(count+1, unit, field, security_list=stock_list)if normalize:for idx in range(len(prices.index)):if idx > 0:prices.iloc[idx] = prices.iloc[idx] / prices.iloc[0]prices.iloc[0] = prices.iloc[0] / prices.iloc[0]returns = pricesif use_return:returns = prices.pct_change()[1:]index_ret = returns[index]results = {}results['code'] = []results['alpha'] = []results['beta'] = []results['rsquared'] = []results['return'] = []for stock in stock_list:if stock == index or stock not in returns.columns:continuestock_ret = returns[stock]X = sm.add_constant(stock_ret)model = sm.regression.linear_model.OLS(index_ret, X)model_result = model.fit()if model_result is not None and len(model_result.params) > 1:results['code'].append(stock)results['alpha'].append(model_result.params[0])results['beta'].append(model_result.params[1])results['rsquared'].append(model_result.rsquared)results['return'].append(prices[stock][-1] / prices[stock][0]-1.0)df = pd.DataFrame(results).dropna()return dfcalculate_alpha_beta(['600507.XSHG','600741.XSHG'])
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
alpha | beta | code | return | rsquared | |
0 | 0.581552 | 0.423005 | 600507.XSHG | 0.128585 | 0.639028 |
1 | 0.622698 | 0.375139 | 600741.XSHG | 0.036780 | 0.461453 |
### 光头光脚# 计算光头光脚指标(CDLMARUBOZU)def CDLMARUBOZU(security_list, check_date):import talibimport numpy as npimport six# 修复传入为单只股票的情况if isinstance(security_list, six.string_types):security_list = [security_list]# 计算 CDLMARUBOZUta = {}for stock in security_list:security_data = get_price(stock, end_date=check_date, frequency='1d', fields=[ 'close', 'high', 'low', 'open'], skip_paused=True, count=20)nan_count = list(np.isnan(security_data['close'])).count(True)if nan_count == len(security_data['close']):log.info("股票 %s 输入数据全是 NaN,该股票可能已退市、未上市或刚上市,返回 NaN 值数据。" % stock)ta[stock] = np.nanelse:if nan_count > 0:security_data.fillna(method="bfill", inplace=True)_close = security_data['close']_high = security_data['high']_low = security_data['low']_open = security_data['open']ta[stock] = talib.CDLMARUBOZU(_open, _high, _low, _close)[-1]return taif __name__ == '__main__':securities = list(get_all_securities().index)[:60]ta = CDLMARUBOZU(security_list=securities, check_date='2019-04-18')print(ta)
{'000001.XSHE': 0, '000002.XSHE': 0, '000004.XSHE': 0, '000005.XSHE': 0, '000006.XSHE': 0, '000007.XSHE': 0, '000008.XSHE': -100, '000009.XSHE': 0, '000010.XSHE': 0, '000011.XSHE': 0, '000012.XSHE': 0, '000014.XSHE': -100, '000016.XSHE': 0, '000017.XSHE': 0, '000018.XSHE': 0, '000019.XSHE': 0, '000020.XSHE': 0, '000021.XSHE': 0, '000022.XSHE': 0, '000023.XSHE': -100, '000024.XSHE': 0, '000025.XSHE': 0, '000026.XSHE': 0, '000027.XSHE': 0, '000028.XSHE': 0, '000029.XSHE': 0, '000030.XSHE': 0, '000031.XSHE': -100, '000032.XSHE': 0, '000033.XSHE': 0, '000034.XSHE': 0, '000035.XSHE': 0, '000036.XSHE': 0, '000037.XSHE': -100, '000038.XSHE': 0, '000039.XSHE': 0, '000040.XSHE': 0, '000042.XSHE': -100, '000043.XSHE': 0, '000045.XSHE': 0, '000046.XSHE': 0, '000048.XSHE': 0, '000049.XSHE': 0, '000050.XSHE': 0, '000055.XSHE': -100, '000056.XSHE': 0, '000058.XSHE': 0, '000059.XSHE': 0, '000060.XSHE': 0, '000061.XSHE': 0, '000062.XSHE': 0, '000063.XSHE': 0, '000065.XSHE': 0, '000066.XSHE': 0, '000068.XSHE': 0, '000069.XSHE': 0, '000070.XSHE': 0, '000078.XSHE': 0, '000088.XSHE': 0, '000089.XSHE': 0}