最近研究了一篇申万宏源证券的研报--《异常财务指标因子研究》。该研报提出六个基本面财务指标,不仅在一定程度上能识别上市公司财务造假的情况,更能有效预测未来盈余。本文按照该研报的描述尝试复现研究结果,验证异常财务因子的有效性。
首先我们列出6个异常财务指标的选取原因:
以及其计算公式:
2.异常应收款因子
异常应收款因子区分度不大,但单调性更为明显。IC均值为2.78%,IC_IR为0.76,多空组合年化收益率约为4.47%。
3.异常其他应收款因子
异常其他应收款因子区分能力一般,单调性不强,但多空部分分化明显。IC均值为1.54%,IC_IR为0.39,多空组合年化收益率约为3.77%。
4.异常预付款项因子
异常预付款项因子分化情况与其他应收款相似。IC均值为1.67%,IC_IR为0.35,多空组合年化收益率约为2.40%。
5.异常销售管理费用因子
异常销售管理费用因子没有明显单调性,但前三组显著高于其他组合收益。IC均值为1.75%,IC_IR为0.44,多空组合年化收益率约为3.49%。
6.异常毛利润因子
异常毛利润因子在6个因子中表现最好,区分度、单调性都非常明显。IC均值为4.87%,IC_IR为1.11,多空组合年化收益率约为8.66%, 且比较稳定。
7.小结
根据上述研究结果,有理由证明异常财务因子具有一定的策略增强效果,有兴趣的同学可以自己利用这个因子写一个策略试试。
注:研究代码为整洁打包成一个main函数,实际使用的时候建议把循环分开运行,否则在看到结果前耗时可能较长。
from jqdata import *
from jqfactor import *
import numpy as np
import pandas as pd
import datetime as dt
import math
import time
def main():
l = ['abnormal_inventories', 'abnormal_account_receivable', 'abnormal_other_receivable',
'abnormal_advance_peceipts', 'abnormal_sa_expense', 'abnormal_gross_profit', 'abnormal_fundamental']
for f in l:
#求十分组年化收益并作柱状图
df = get_multipleReturn10(f)
m = []
for c in df.columns:
a1, t1 = return_line(df[c], f, plot = False)
m.append(a1)
m2 = np.array(m) ** (1/9)
m_s = pd.DataFrame(m2 - 1, index = range(1, 11), columns = ['样本期年化收益率'])
m_s.plot(kind = 'bar', title = t1)
#画多空收益曲线
r = ls_portfolio(f)
a2, t2 = return_line(r, f)
mdd = maxdrawdown(r)
print(t2 + '的最大回撤为' + str(mdd))
#计算IC_IR
IC = get_multipleIC(f)
print(t2 + 'IC均值为:' + str(mean(IC)) )
print(t2 + 'IC_IR为:' + str(mean(IC)/std(IC)) )
#十分组收益率
def get_multipleReturn10(f):
d = ['2008-11-01', '2009-05-01', '2009-09-01', '2009-11-01', '2010-05-01', '2010-09-01', '2010-11-01',
'2011-05-01', '2011-09-01', '2011-11-01', '2012-05-01', '2012-09-01', '2012-11-01',
'2013-05-01', '2013-09-01', '2013-11-01', '2014-05-01', '2014-09-01', '2014-11-01',
'2015-05-01', '2015-09-01', '2015-11-01', '2016-05-01', '2016-09-01', '2016-11-01',
'2017-05-01', '2017-09-01', '2017-11-01', '2018-05-01', '2018-09-01', '2018-11-01']
#建立df存储数据
c = []
for i in range(1, 11):
c.append(str(i))
df = pd.DataFrame(columns = c)
for i in range(3, len(d) - 1):
#股票池
all_stocks = list(get_all_securities(['stock'], date = d[i]).index)
s = set_feasible_stocks(all_stocks, d[i])
#全部异常财务因子
abdf = abnormal_fundamental(d[i], d[i - 3], s)
#中性化
abin = neutralize(abdf[f], how = ['sw_l1', 'market_cap'], date = d[i], axis = 0, fillna = 'sw_l1')
abin_sorted = abin.sort_values(ascending = False)
#abinIC = get_currentIC(d[i], d[i + 1], abin_sorted)
#IC = pd.concat([IC, abinIC], axis = 0)
#分组
n = int(len(abin_sorted)/10)
for j in range(1, 11):
long = abin_sorted.index.tolist()[(j-1) * n : j * n]
long_price = get_price(long, start_date = d[i], end_date = d[i + 1], fields = ['close', 'paused'])
long_close = long_price['close']
long_paused = long_price['paused']
#去停牌
for l in range(len(long_paused.index)):
for m in range(len(long_paused.columns)):
if long_paused.iloc[l,m] == 1:
long_close.iloc[l,m] == np.nan
for k in range(len(long_close.index) - 1):
df.loc[long_close.index.tolist()[k], str(j)] = ((long_close.iloc[k + 1, :]/long_close.iloc[k, :]).dropna() - 1).mean()
return df
#多空一十组合
def ls_portfolio(f):
d = ['2008-11-01', '2009-05-01', '2009-09-01', '2009-11-01', '2010-05-01', '2010-09-01', '2010-11-01',
'2011-05-01', '2011-09-01', '2011-11-01', '2012-05-01', '2012-09-01', '2012-11-01',
'2013-05-01', '2013-09-01', '2013-11-01', '2014-05-01', '2014-09-01', '2014-11-01',
'2015-05-01', '2015-09-01', '2015-11-01', '2016-05-01', '2016-09-01', '2016-11-01',
'2017-05-01', '2017-09-01', '2017-11-01', '2018-05-01', '2018-09-01', '2018-11-01', '2018-12-11']
#IC = pd.DataFrame()
r = []
for i in range(3, len(d) - 1):
#股票池
all_stocks = list(get_all_securities(['stock'], date = d[i]).index)
s = set_feasible_stocks(all_stocks, d[i])
#全部异常财务因子
abdf = abnormal_fundamental(d[i], d[i - 3], s)
#中性化
abin = neutralize(abdf[f], how = ['sw_l1', 'market_cap'], date = d[i], axis = 0, fillna = 'sw_l1')
abin_sorted = abin.sort_values(ascending = False)
#abinIC = get_currentIC(d[i], d[i + 1], abin_sorted)
#IC = pd.concat([IC, abinIC], axis = 0)
#分组,取第一组和第十组
n = int(len(abin_sorted)/10)
long = abin_sorted.index.tolist()[0:n]
short = abin_sorted.index.tolist()[(-n-1):-1]
#多头,去停牌
long_price = get_price(long, start_date = d[i], end_date = d[i + 1], fields = ['close', 'paused'])
long_close = long_price['close']
long_paused = long_price['paused']
for l in range(len(long_paused.index)):
for m in range(len(long_paused.columns)):
if long_paused.iloc[l,m] == 1:
long_close.iloc[l,m] == np.nan
#long_open = get_price(long, end_date = d[i], count = 1, fields = ['close'])['close']
#空头,去停牌
short_price = get_price(short, start_date = d[i], end_date = d[i + 1], fields = ['close', 'paused'])
short_close = short_price['close']
short_paused = short_price['paused']
for l in range(len(short_paused.index)):
for m in range(len(short_paused.columns)):
if short_paused.iloc[l,m] == 1:
short_close.iloc[l,m] == np.nan
#short_open = get_price(short, end_date = d[i], count = 1, fields = ['close'])['close']
#计算收益率
for j in range(len(long_close.index) - 1):
r.append(((long_close.iloc[j + 1, :]/long_close.iloc[j, :]).dropna() - 1).mean()
- ((short_close.iloc[j + 1, :]/short_close.iloc[j, :]).dropna() - 1).mean())
#r.append((long_close.iloc[0, :]/long_open.iloc[0, :]).mean())
#r.append((short_close.iloc[0, :]/short_open.iloc[0, :]).mean())
return r
#画收益曲线,输入r为收益率时间序列,f为因子名称, plot默认为True,False则不画图
def return_line(r, f, plot = True):
r = np.array(r) + 1
p = []
m = 1
p.append(m)
for i in range(0,len(r)):
m *= r[i]
p.append(m)
l = ['abnormal_inventories', 'abnormal_account_receivable', 'abnormal_other_receivable',
'abnormal_advance_peceipts', 'abnormal_sa_expense', 'abnormal_gross_profit', 'abnormal_fundamental']
t = ''
if f == l[0]:
t = '异常存货因子多空组合'
elif f == l[1]:
t = '异常应收款因子多空组合'
elif f == l[2]:
t = '异常其他应收款因子多空组合'
elif f == l[3]:
t = '异常预付款项因子多空组合'
elif f == l[4]:
t = '异常销售管理费用因子多空组合'
elif f == l[5]:
t = '异常毛利润因子多空组合'
elif f == l[6]:
t = '异常财务因子多空组合'
if plot == True:
d = get_trade_days(start_date = '2009-11-01', end_date = '2018-12-11')[0: len(p)]
p = pd.DataFrame(p, index = d, columns = ['多空收益率'])
p.plot(title = t)
return m, t
#计算最大回撤, 输入r为日收益率时间序列
def maxdrawdown(r):
r1 = np.array(r) + 1
m = 1
r2 = []
for i in range(len(r1)):
m *= r1[i]
r2.append(m)
mdd = 0
peak = r2[0]
for i in range(len(r2)):
if r2[i] > peak:
peak = r2[i]
dd = (peak - r2[i]) / peak
if dd > mdd:
mdd = dd
return mdd
#获取可行股票池,剔除金融类、停牌、st股票
def set_feasible_stocks(s, d):
all_stocks = s
#得到是否停牌信息的dataframe,停牌得1,未停牌得0
suspended_info_df = get_price(list(all_stocks), end_date = d, count = 1, frequency = 'daily', fields = 'paused')['paused'].T
#过滤未停牌股票 返回dataframe
suspended_index = suspended_info_df.iloc[:,0] == 1
#得到当日停牌股票的代码list:
suspended_stocks = suspended_info_df[suspended_index].index.tolist()
#得到st股票信息,st为True,非st为False
st_info_df = get_extras('is_st', all_stocks, end_date = d, count = 1).T
#过滤非st股票
st_index = st_info_df.iloc[:,0] == True
#得到st股票的代码list:
st_stocks = st_info_df[st_index].index.tolist()
#得到银行业股票代码
bank_stocks = get_industry_stocks('801780', d)
#得到非银金融业股票代码
non_bank_fin_stocks = get_industry_stocks('801790', d)
#合并股票代码列表
suspended_stocks.extend(st_stocks)
suspended_stocks.extend(bank_stocks)
suspended_stocks.extend(non_bank_fin_stocks)
#剔除金融类、停牌、st股票
for stock in suspended_stocks:
if stock in all_stocks:
all_stocks.remove(stock)
return all_stocks
#d为当期,d2为去年同期,s为可行股票池
def abnormal_fundamental(d, d2, s):
#查询营业收入
q = query(valuation.code, income.operating_revenue).filter(valuation.code.in_(s))
#获取当期营业收入
df = get_fundamentals(q, d)
#获取去年同期营业收入
df2 = get_fundamentals(q, d2)
#合并
res = pd.merge(df,df2, on = 'code')
#计算正常增长乘数
normal_growth_multiplier = pd.DataFrame(res['operating_revenue_x']/res['operating_revenue_y'],
columns = ['normal_growth_multiplier'])
normal_growth_multiplier = normal_growth_multiplier.join(pd.DataFrame(res['code']))
#查询总资产
q2 = query(valuation.code, balance.total_assets).filter(valuation.code.in_(s))
#查询所需财务指标
q3 = query(valuation.code, balance.inventories, balance.account_receivable, balance.advance_payment,
balance.other_receivable, balance.advance_peceipts, income.sale_expense, income.administration_expense,
indicator.gross_profit_margin).filter(valuation.code.in_(s))
#获取当期总资产
df5 = get_fundamentals(q2, d)
#获取当期所需财务指标
df6 = get_fundamentals(q3, d)
#获取去年同期所需财务指标
df7 = get_fundamentals(q3, d2)
#合并表,当期后缀为_x, 去年同期后缀为_y
res2 = pd.merge(df5, df6, on = 'code', how = 'inner')
res2 = pd.merge(res2, df7, on = 'code', how = 'inner')
res2 = pd.merge(res2, normal_growth_multiplier, on = 'code', how = 'inner')
res2 = pd.merge(res2, res, on = 'code', how = 'inner')
#清洗
res2.dropna(how = 'any', inplace = True)
#异常存货
abnormal_inventories = (-1) * (res2['inventories_x'] - (res2['inventories_y'] * res2['normal_growth_multiplier'])) / res2['total_assets']
abnormal_inventories = pd.DataFrame(abnormal_inventories, columns = ['abnormal_inventories'])
#异常应收款
abnormal_account_receivable = (-1) * ( ( res2['account_receivable_x'] + res2['advance_payment_x'] ) - ( ( res2['account_receivable_y'] +
res2['advance_payment_y'] ) * res2['normal_growth_multiplier'] ) ) / res2['total_assets']
abnormal_account_receivable = pd.DataFrame(abnormal_account_receivable, columns = ['abnormal_account_receivable'])
#异常其他应收款
abnormal_other_receivable = (-1) * (res2['other_receivable_x'] - (res2['other_receivable_y'] * res2['normal_growth_multiplier'] ) ) / res2['total_assets']
abnormal_other_receivable = pd.DataFrame(abnormal_other_receivable, columns = ['abnormal_other_receivable'] )
#异常预收款
abnormal_advance_peceipts = (res2['advance_peceipts_x'] - (res2['advance_peceipts_y'] * res2['normal_growth_multiplier'] ) ) / res2['total_assets']
abnormal_advance_peceipts = pd.DataFrame(abnormal_advance_peceipts, columns = ['abnormal_advance_peceipts'] )
#异常销售管理费用
abnormal_sa_expense = (-1) * ( ( res2['sale_expense_x'] + res2['administration_expense_x'] ) - ( ( res2['sale_expense_y'] +
res2['administration_expense_y']) * res2['normal_growth_multiplier'] ) ) / res2['total_assets']
abnormal_sa_expense = pd.DataFrame(abnormal_sa_expense, columns = ['abnormal_sa_expense'] )
#异常毛利润
abnormal_gross_profit = ( ( res2['gross_profit_margin_x'] * res2['operating_revenue_x'] ) - ( res2['gross_profit_margin_y'] *
res2['operating_revenue_y'] * res2['normal_growth_multiplier'] ) ) / res2['total_assets']
abnormal_gross_profit = pd.DataFrame(abnormal_gross_profit, columns = ['abnormal_gross_profit'] )
#按列并表
l = [abnormal_inventories, abnormal_account_receivable, abnormal_other_receivable,
abnormal_advance_peceipts, abnormal_sa_expense, abnormal_gross_profit]
result = pd.concat(l, axis = 1)
result.index = res2['code']
#按列标准化
result = standardlize(result, axis = 0)
#加总求和
final = pd.DataFrame(result.apply(sum, axis = 1), columns = ['abnormal_fundamental'])
result['code'] = res2['code'].tolist()
final['code'] = res2['code'].tolist()
#并表
result = pd.merge(result, final, on = 'code')
result.index = res2['code']
return result
def get_multipleIC(f):
d = ['2008-11-01', '2009-05-01', '2009-09-01', '2009-11-01', '2010-05-01', '2010-09-01', '2010-11-01',
'2011-05-01', '2011-09-01', '2011-11-01', '2012-05-01', '2012-09-01', '2012-11-01',
'2013-05-01', '2013-09-01', '2013-11-01', '2014-05-01', '2014-09-01', '2014-11-01',
'2015-05-01', '2015-09-01', '2015-11-01', '2016-05-01', '2016-09-01', '2016-11-01',
'2017-05-01', '2017-09-01', '2017-11-01', '2018-05-01', '2018-09-01', '2018-11-01']
IC = []
#按更新期计算IC
for i in range(3, len(d) - 1):
all_stocks = list(get_all_securities(['stock'], date = d[i]).index)
s = set_feasible_stocks(all_stocks, d[i])
abdf = abnormal_fundamental(d[i], d[i - 3], s)
abin = neutralize(abdf[f], how = ['sw_l1', 'market_cap'], date = d[i], axis = 0, fillna = 'sw_l1')
abin_sorted = abin.sort_values(ascending = False)
abinIC = get_currentIC(d[i], d[i + 1], abin_sorted)
IC.append(abinIC)
return IC
def get_currentIC(d, d2, f):
factors = f
#获取历史收盘价
h_price = get_price(factors.index.tolist(), start_date = d, end_date = d2, frequency = 'daily', fields = ['close'], skip_paused = False, fq = 'pre')['close']
yield20 = pd.DataFrame( (h_price.iloc[-1, :] / h_price.iloc[0, :] - 1), index = factors.index.tolist(), columns = ['yield_20'])
temp = pd.merge(pd.DataFrame(factors), pd.DataFrame(yield20), left_index = True, right_index = True)
IC = temp.corr(method = 'spearman').iloc[0,1]
return IC
main()
本社区仅针对特定人员开放
查看需注册登录并通过风险意识测评
5秒后跳转登录页面...