近段时间A股可以说是内忧外患,内有经济转型面临的短期困难,外有贸易战以及全球股市表现拖累等影响,一再突破前期低点,让人不禁感慨“敢问底在何方?”。刚好最近在学习数据可视化,于是就整理了一些目前A股市场的信息帮助大家参考、判断。
(注:若无特别说明,以下数据均选取自2007年1月1日至2018年11月30日,单日数据来自2018年11月30日。若需要其他日期数据,请克隆代码自行调整日期参数。)
(这里我们只选取了市盈率和市净率两个指标,需要其他指标改改代码就可以了。)
先上图
注释:
根据以上统计汇总得到的结果可以看出,市场底部的特征还是比较明显的,但是具体底部会持续多久,又是否会继续下探,对于同样的信息每个人也都有自己不同的解读,不好一概而论。本文旨在达到科普、抛砖引玉的效果,欢迎大家以此为框架深入研究,有兴趣可以留言多多交流建议和思路。
#导入各种包,建议使用聚宽研究平台运行。
#若使用其他方式,请自行导入numpy和matplotlib
import pandas as pd
from jqdata import *
from jqfactor import get_factor_values
def main():
#定义全局变量
global date, all_stocks, industries, indu_name
#取交易日信息
date = get_trade_days(start_date = '2007-01-01', end_date = '2018-11-30')
#取全部股票代码
all_stocks = list(get_all_securities(['stock']).index)
#获取申万一级行业
industries = get_industries(name='sw_l1').index
industries = industries.tolist()
#获取行业名
indu_name = get_industries(name='sw_l1')['name'].tolist()
#1
Stock_MCD()
#2
Industry_MCD('market_cap')
Industry_MCD('circulating_market_cap')
#3
Market_Fundamentals('pe_ratio')
Market_Fundamentals('pb_ratio')
#4
Industry_Fundamentals('pe_ratio')
Industry_Fundamentals('pb_ratio')
#5
Volume_Money()
#6
Turnover_Ratio()
TR_Distribution('2018-11-30')
TR_Distribution('2015-03-30')
#1
#股票市值分布
def Stock_MCD():
#获取全部股票市值
q = query(valuation.market_cap, valuation.code).filter(valuation.code.in_(all_stocks))
dfmc = get_fundamentals(q,'2018-11-30')
#分层处理数据
counta = 0
countb = 0
countc = 0
countd = 0
counte = 0
for i in range(len(dfmc)):
try:
if dfmc['market_cap'][i] <= 50:
counta += 1
elif dfmc['market_cap'][i] <= 100:
countb += 1
elif dfmc['market_cap'][i] <= 200:
countc += 1
elif dfmc['market_cap'][i] <= 400:
countd += 1
else:
counte += 1
except:
continue
#设置绘图参数
a = '市值小于50亿元'
b = '市值在50至100亿元之间'
c = '市值在100至200亿元之间'
d = '市值在200至400亿元之间'
e = '市值大于400亿元'
res = [counta, countb,countc,countd,counte]
l = [a,b,c,d,e]
explode = [0.1] * 5
#画饼状图
fig, ax = subplots()
ax.pie(x = res, labels = l, autopct='%.2f%%', shadow=True, explode = explode, counterclock = False)
ax.set_title('A股市值分布情况')
#ax.text(x = 1, y = -1, s = '单位:亿元')
#找出市值最大的几支股票
dfmc2 = dfmc.sort_values(by = 'market_cap', ascending = False)
#对大市值股票计数
count_m1 = 0
count_m2 = 0
for i in range(len(dfmc2)):
if dfmc['market_cap'][i] > 10000:
count_m1 += 1
count_m2 += 1
elif dfmc['market_cap'][i] > 5000:
count_m2 += 1
#求市值占比
s = sum(dfmc2['market_cap'])
p1 = sum(dfmc2['market_cap'][0:count_m1])/s
p2 = sum(dfmc2['market_cap'][0:count_m2])/s
print('市值在10000亿元以上的股票有{0}支,占A股总市值的{1:.2%}'.format(count_m1, p1))
print('市值在5000亿元以上的股票有{0}支,占A股总市值的{1:.2%}'.format(count_m2, p2))
#输出市值前五只股票信息
print('其中市值最大的前五支股票分别为:')
stock_name = []
market_cap = []
for i in range(5):
stock_name.append(get_security_info(dfmc2['code'].tolist()[i]).display_name)
market_cap.append(dfmc2['market_cap'].tolist()[i])
print('{0:8}{1:10}'.format('股票名称','市值(亿元)'))
for i in range(5):
print('{0:6}{1:10.2f}'.format(stock_name[i],market_cap[i]))
#2
#行业市值分布
def Industry_MCD(m):
#新建一个空列表
df = list([0] * len(industries))
#获取全部行业内的股票
for i in range(len(industries)):
df[i] = get_industry_stocks(industries[i])
#转换为DataFrame
df = pd.DataFrame(df)
df.index = industries
#数据清洗
df.dropna(how = 'all', inplace = True)
#保留原数据备用
df2 = df.copy()
if m == 'market_cap':
#获取个股市值
for i in range(len(df2)):
for j in range(len(df2.columns)):
try:
q = query(valuation.market_cap).filter(valuation.code.in_([df2.iloc[i,j]]))
df2.iloc[i,j] = get_fundamentals(q, "2018-11-30").iloc[0,0]
except:
df2.iloc[i,j] = 0
elif m == 'circulating_market_cap':
#获取个股市值
for i in range(len(df2)):
for j in range(len(df2.columns)):
try:
q = query(valuation.circulating_market_cap).filter(valuation.code.in_([df2.iloc[i,j]]))
df2.iloc[i,j] = get_fundamentals(q, "2018-11-30").iloc[0,0]
except:
df2.iloc[i,j] = 0
else:
print('所需指标暂不存在,请输入其他指标')
return None
df2.fillna(0, inplace = True)
#加总得到各行业市值
res = df2.apply(sum, axis = 'columns')
#做饼状图
df3 = pd.DataFrame(res)
df3.index = indu_name
df3.columns = ['market_cap']
explode = [0.2] * len(df3)
fig, ax = subplots()
ax.pie(df3, labels = df3.index, autopct='%.2f%%', shadow=False, radius = 3, explode = explode)
#3
#市场财务指标值,f为指标名称,目前只可选'pe_ratio', 'pb_ratio'
def Market_Fundamentals(f):
div = ''
#市场平均市盈率
if f == 'pe_ratio':
div = 'net_profit_ttm'
q = query(valuation.market_cap, valuation.code).filter(valuation.code.in_(all_stocks))
#数据获取、清洗、整合、计算处理
res = []
for i in range(len(date)):
df = get_fundamentals(q, date[i])
df2 = get_factor_values(all_stocks, [div], end_date = date[i], count = 1)[div].T
df2.columns = [div]
df_temp = pd.DataFrame({"code" : df2.index}, index = df2.index)
df2 = df2.join(df_temp, how = "outer")
df2.index = range(len(df2))
df = pd.merge(df,df2, on = "code", how = "left")
df.index = range(len(df))
for j in range(len(df)):
if df[div][j] <= 0 :
df.drop([j], inplace = True)
df.dropna(axis = 'index', inplace = True)
res.append(sum(df['market_cap'])*1e8/sum(df[div]))
res = pd.DataFrame(res, index = date, columns = ['市场平均市盈率'])
#作图
figsize(15,10)
fig, ax = subplots()
tick_params(labelsize=15)
plot(res)
ax.set_title('历史12年内市场平均市盈率走势', fontsize = 20)
ax.set_xlim('2007', '2018-11-30')
#市场平均市净率
elif f == 'pb_ratio':
div = 'equities_parent_company_owners'
#求历史平均市净率
q = query(valuation.market_cap, valuation.code, balance.equities_parent_company_owners).filter(valuation.code.in_(all_stocks))
res = []
for i in range(len(date)):
df = get_fundamentals(q, date[i])
df.index = range(len(df))
for j in range(len(df)):
if df[div][j] <= 0 :
df.drop([j], inplace = True)
df.dropna(axis = 'index', inplace = True)
res.append(sum(df['market_cap'])*1e8/sum(df[div]))
res = pd.DataFrame(res, index = date, columns = ['市场平均市净率'])
#作图
figsize(15,10)
fig, ax = subplots()
tick_params(labelsize=15)
plot(res)
ax.set_title('历史12年内市场平均市净率走势', fontsize = 20)
ax.set_xlim('2007', '2018-11-30')
else:
print('所需指标暂不存在,请输入其他指标')
return None
#4
#行业财务指标值,f为指标名称,目前只可选'pe_ratio', 'pb_ratio'
def Industry_Fundamentals(f):
div = ''
#行业平均市盈率
if f == 'pe_ratio':
div = 'net_profit_ttm'
#建立空DataFrame存储数据
res = pd.DataFrame([[0]*len(industries)]*len(date), index = date, columns = industries)
#数据获取、清洗、处理(会跑两个小时)
for i in range(len(date)):
#分别获取市值和净利润数据,并按个股代码合并为一个DataFrame
for industry in industries:
indu_stocks = get_industry_stocks(industry, date[i])
if indu_stocks == []:
continue
q = query(valuation.market_cap, valuation.code).filter(valuation.code.in_(indu_stocks))
df = get_fundamentals(q, date[i])
df2 = get_factor_values(indu_stocks, [div], end_date = date[i], count = 1)[div].T
df2.columns = [div]
df_temp = pd.DataFrame({"code" : df2.index}, index = df2.index)
df2 = df2.join(df_temp, how = "outer")
df2.index = range(len(df2))
df = pd.merge(df,df2, on = "code", how = "left")
df.index = range(len(df))
#清洗亏损个股
for j in range(len(df)):
if df[div][j] <= 0 :
df.drop([j], inplace = True)
#去掉缺省值(缺省主要因为新股,若加入市值远大于原行业的新股,而净利润缺省,对PE计算影响极大,因此去掉缺省值)
df.dropna(axis = 'index', inplace = True)
#结果填充
res.loc[date[i], industry] = sum(df['market_cap'])*1e8/sum(df[div])
#行业平均市净率
elif f == 'pb_ratio':
div = 'equities_parent_company_owners'
#建立空DataFrame存储数据
res = pd.DataFrame([[0]*len(industries)]*len(date), index = date, columns = industries)
#数据获取、清洗、处理(会跑两个小时)
for i in range(len(date)):
#分别获取市值和净利润数据,并按个股代码合并为一个DataFrame
for industry in industries:
indu_stocks = get_industry_stocks(industry, date[i])
if indu_stocks == []:
continue
q = query(valuation.market_cap, valuation.code, balance.equities_parent_company_owners).filter(valuation.code.in_(indu_stocks))
df = get_fundamentals(q, date[i])
df.index = range(len(df))
#清洗亏损个股
for j in range(len(df)):
if df[div][j] <= 0 :
df.drop([j], inplace = True)
#去掉缺省值(缺省主要因为新股,若加入市值远大于原行业的新股,而净利润缺省,对PE计算影响极大,因此去掉缺省值)
df.dropna(axis = 'index', inplace = True)
#结果填充
res.loc[date[i], industry] = sum(df['market_cap'])*1e8/sum(df[div])
else:
print('所需指标暂不存在,请输入其他指标')
return None
IFplot(res, f)
def IFplot(res, f):
#按申万调整期划分
res1 = res.loc[:datetime.date(2014, 2, 20)]
res2 = res.loc[datetime.date(2014, 2, 20):]
#2014年调整以前
res1.columns = indu_name
#求行业标准差代表波动率
a = res1.apply(std, axis = 0)
#排序
a.sort_values(ascending = False, inplace = True)
#去除0值行业
i = 0
while i in range(len(a)):
if a[i] == 0:
a = a.iloc[:i]
break
else:
i += 1
#取得波动率最大的五名
b = []
for i in range(5):
b.append(a.index.tolist()[i])
#作图
fig1, ax1 = subplots()
for ind in b:
res1[ind].plot.line(figsize = (20,15), fontsize = 20)
ax1.legend(b, fontsize = 20)
if f == 'pe_ratio':
ax1.set_title('2014年调整以前行业平均市盈率波动幅度最大的前五行业', fontsize = 20)
elif f == 'pb_ratio':
ax1.set_title('2014年调整以前行业平均市净率波动幅度最大的前五行业', fontsize = 20)
else:
return None
#取得波动率最小的五名
c = []
for i in range(1, 6):
c.append(a.index.tolist()[-i])
#作图
fig2, ax2 = subplots()
for ind in c:
res1[ind].plot.line(figsize = (20,15), fontsize = 20)
ax2.legend(c, fontsize = 20)
if f == 'pe_ratio':
ax2.set_title('2014年调整以前行业平均市盈率波动幅度最小的前五行业', fontsize = 20)
elif f == 'pb_ratio':
ax2.set_title('2014年调整以前行业平均市净率波动幅度最小的前五行业', fontsize = 20)
else:
return None
#2014年调整以后
res2.columns = indu_name
#求行业标准差代表波动率
d = res2.apply(std, axis = 0)
#排序
d.sort_values(ascending = False, inplace = True)
#取得波动率最大的五名
e = []
for i in range(5):
e.append(d.index.tolist()[i])
#作图
fig3, ax3 = subplots()
for ind in e:
res2[ind].plot.line(figsize = (20,15), fontsize = 20)
ax3.legend(e, fontsize = 20)
if f == 'pe_ratio':
ax3.set_title('2014年调整以后行业平均市盈率波动幅度最大的前五行业', fontsize = 20)
elif f == 'pb_ratio':
ax3.set_title('2014年调整以后行业平均市净率波动幅度最大的前五行业', fontsize = 20)
else:
return None
#取得波动率最小的五名
f = []
for i in range(1, 6):
f.append(d.index.tolist()[-i])
#作图
fig4, ax4 = subplots()
for ind in f:
res2[ind].plot.line(figsize = (20,15), fontsize = 20)
ax4.legend(f, fontsize = 20)
if f == 'pe_ratio':
ax4.set_title('2014年调整以后行业平均市盈率波动幅度最小的前五行业', fontsize = 20)
elif f == 'pb_ratio':
ax4.set_title('2014年调整以后行业平均市净率波动幅度最小的前五行业', fontsize = 20)
else:
return None
#5
#市场成交量和成交总额变化情况
def Volume_Money():
#获取近12年沪市和深市的成交量和成交总额数据
df = get_price(['000001.XSHG','399001.XSHE'], start_date = '2007-01-01', end_date = '2018-11-30',
fields = ['volume','money'])
sh = df.minor_xs('000001.XSHG')
sz = df.minor_xs('399001.XSHE')
combine = sh.copy()
#求和
combine['money'] = sh['money'] + sz['money']
combine['volume'] = sh['volume'] + sz['volume']
#作图
figsize(15,10)
fig, ax = subplots(2,1)
tick_params(labelsize=15)
ax[0].set_title('12年内市场成交总额和成交量的变化情况', fontsize = 20)
ax[0].plot(combine['money'])
ax[0].legend(['成交总额'], fontsize = 15)
ax[0].set_xlim('2007', '2018-11-30')
ax[1].plot(combine['volume'], color = 'orange')
ax[1].legend(['成交量'], fontsize = 15)
ax[1].set_xlim('2007', '2018-11-30')
#作箱线图
combine.columns = ['成交总额', '成交量']
combine.plot.box(subplots = True, figsize = (10,12), fontsize = 16)
#6
#市场平均换手率走势和占比
def Turnover_Ratio():
#查询字段换手率和股票代码
q = query(valuation.turnover_ratio,valuation.code).filter(valuation.code.in_(all_stocks))
#按日期求得平均换手率
res = []
for i in range(len(date)):
c = get_fundamentals(q, date[i])
res.append(mean(c['turnover_ratio']))
#转换数据格式并作图
res = pd.DataFrame(res)
res.index = date
res.columns = ['市场平均换手率']
#作图
figsize(30,20)
fig, ax = subplots()
tick_params(labelsize=30)
plot(res)
ax.set_title('历史12年内市场平均换手率走势', fontsize = 40)
ax.set_xlim('2007', '2018-11-30')
#按日期求得平均换手率
date_temp = get_trade_days(end_date = '2018-11-30', count = 100)
res2 = []
for i in range(len(date_temp)):
c = get_fundamentals(q, date_temp[i])
tm.append(mean(c['turnover_ratio']))
#转换数据格式并作图
res2 = pd.DataFrame(res2)
res2.index = date_temp
res2.columns = ['市场平均换手率']
figsize(15,10)
fig, ax = subplots()
tick_params(labelsize=15)
plot(res2)
ax.set_title('历史100天内市场平均换手率走势', fontsize = 20)
#换手率数据分布统计并作图
def TR_Distribution(date):
q = query(valuation.turnover_ratio,valuation.code).filter(valuation.code.in_(all_stocks))
trs = get_fundamentals(q, date)['turnover_ratio']
trs = trs.tolist()
counta = 0
countb = 0
countc = 0
countd = 0
counte = 0
for tr in trs:
if tr < 1:
counta += 1
elif tr < 3:
countb += 1
elif tr < 5:
countc += 1
elif tr < 10:
countd += 1
else :
counte += 1
result = [counta, countb, countc, countd, counte]
a = '换手率小于1%'
b = '换手率在1%-3%之间'
c = '换手率在3%-5%之间'
d = '换手率在5%-10%之间'
e = '换手率大于10%'
l = [a,b,c,d,e]
explode = [0.1] * 5
fig, ax = subplots(figsize = (8,7))
ax.set_title('换手率分布情况--' + date, fontsize = 15)
ax.pie(result, labels = l, shadow = True, counterclock = False, explode = explode, autopct = '%.2f%%')
#调用主函数,得到结果
main()
本社区仅针对特定人员开放
查看需注册登录并通过风险意识测评
5秒后跳转登录页面...
移动端课程