#获取申万官网申万行业数据#导入库import numpy as npimport pandas as pdimport requestsimport jsonfrom datetime import timedelta,date# 获取申万官网申万行业数据# code:行业代码 https://www.joinquant.com/help/api/help?name=plateData#申万行业# frequency:day/week/month# start_date:None(表示最早日期)# end_date:None(表示今天日期)# fields:None(表示所有字段)def get_sw_data(code=None,start_date=None,end_date=None,frequency='day',fields=None): #headersheader={'HOST':'www.swsindex.com','Referer':'http://www.swsindex.com/idx0200.aspx?columnid=8838&type=Day','User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) \ Chrome/53.0.2785.104 Safari/537.36 Core/1.53.4482.400 QQBrowser/9.7.13001.400'}#传入参数param={'tablename':'V_Report','key':'id',#页面序号,每页返回20条数据'p':'1',#查询语句,查询的代码、日期、数据类型"where":"swindexcode in ('801020') and BargainDate>='2018-04-02' and BargainDate<='2018-04-24' and type='Day'",#排序(swindexcode asc表示按照代码升序,BargainDate_1表示按照日期降序,_2表示按照升序)'orderby':'swindexcode asc,BargainDate_2',#返回的字段'fieldlist':'SwIndexCode,SwIndexName,BargainDate,OpenIndex,CloseIndex,MaxIndex,MinIndex,BargainAmount,BargainSum,Markup,TurnoverRate,\ PE,PB,MeanPrice,BargainSumRate,NegotiablesShareSum,NegotiablesShareSum2,DP','pagecount':'1','timed':'1524497094532',}#数据表表头sw_columns_list=['SwIndexCode','SwIndexName','BargainDate','OpenIndex','CloseIndex','MaxIndex','MinIndex','BargainAmount','BargainSum', 'Markup','TurnoverRate','PE','PB','MeanPrice','BargainSumRate','NegotiablesShareSum','NegotiablesShareSum2','DP']#数据类型(日、周、月)frequency_list=['day','week','month']#配置查询语句where="swindexcode in ("if code is None:#如果代码为空,则代码为代码列表code='801010'else: if type(code)==list:code_str=str(code).replace('[','').replace(']','')if type(code)==str:code_str="'"+code+"'"where+=code_str #配置日期today_str=pd.datetime.today().strftime('%Y-%m-%d')if (start_date is None) or (start_date<'1999-12-30') or (start_date>today_str):start_date='1999-12-30'where+=") and BargainDate>='" where+=start_dateif (end_date is None) or (end_date>today_str) or (end_date<'1999-12-30'):end_date=today_strwhere+="' and BargainDate<='" where+=end_date #配置数据类型if not(frequency in frequency_list): frequency='day'where+="' and type='"where+=frequencywhere+="'"param['where']=where #配置字段columns=sw_columns_listfieldlist=str(sw_columns_list).replace(" ","").replace("'","").replace('[',"").replace(']',"") if not(fields is None):if(set(fields).issubset(set(sw_columns_list))): if not (['SwIndexCode','SwIndexName','BargainDate'] in fields):fields=['SwIndexCode','SwIndexName','BargainDate']+fieldsfieldlist=str(fields).replace(" ","").replace("'","").replace('[',"").replace(']',"") columns=fieldsparam['fieldlist']=fieldlistdf=pd.DataFrame()#urlurl='http://www.swsindex.com/handler.aspx'#页面计数器page=1while True:#获取数据ret=requests.get(url,data=param,headers=header)if not (ret.ok is True):break#整理引号、日期格式 data=ret.text.replace("'", '"').replace(' 0:00:00','').replace('/','-')#解析数据data=json.loads(data).get('root')if len(data)==0:break#追加数据表 df=df.append(pd.DataFrame(data,columns=columns))#设置页面计数器page+=1param['p']=str(page) if len(df)!=0: df.BargainDate=pd.to_datetime(df.BargainDate,format='%Y-%m-%d')#返回数据return df.set_index('BargainDate')df=get_sw_data('850111',start_date='2019-02-23')df.head()
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
SwIndexCode | SwIndexName | OpenIndex | CloseIndex | MaxIndex | MinIndex | BargainAmount | BargainSum | Markup | TurnoverRate | PE | PB | MeanPrice | BargainSumRate | NegotiablesShareSum | NegotiablesShareSum2 | DP | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
BargainDate | |||||||||||||||||
2019-02-25 | 850111 | 种子生产 | 2493.65 | 2603.85 | 2612.97 | 2469.67 | 18544 | 109029 | 4.57 | 3.6043 | 45.49 | 2.68 | 6.70 | 0.10 | 3496540.23 | 437067.53 | 0.57 |
2019-02-26 | 850111 | 种子生产 | 2601.41 | 2577.02 | 2643.98 | 2534.83 | 20089 | 115323 | -1.03 | 3.9047 | 45.02 | 2.65 | 6.65 | 0.11 | 3470405.45 | 433800.68 | 0.58 |
2019-02-27 | 850111 | 种子生产 | 2571.52 | 2547.74 | 2603.46 | 2530.19 | 13651 | 78331 | -1.14 | 2.6533 | 44.51 | 2.62 | 6.58 | 0.09 | 3430769.77 | 428846.22 | 0.59 |
2019-02-28 | 850111 | 种子生产 | 2550.00 | 2559.18 | 2584.13 | 2523.73 | 8255 | 50326 | 0.45 | 1.6044 | 44.71 | 2.64 | 6.62 | 0.08 | 3449990.37 | 431248.80 | 0.58 |
2019-03-01 | 850111 | 种子生产 | 2567.25 | 2570.26 | 2590.56 | 2519.63 | 9037 | 53291 | 0.43 | 1.7564 | 44.91 | 2.65 | 6.64 | 0.08 | 3462555.34 | 432819.42 | 0.58 |
import requestsimport anyjsonimport pandas as pddef get_hot_stock_from_sina():'''从新浪得到热门数据'''html = requests.get('https://ssl-data.sina.com.cn/api/openapi.php/WeiboReferService.getListSymbol?code=CNHOUR6&callback=var%20AHM=').content.decode() n = html[html.index('(')+1:html.index(')')]h = anyjson.deserialize(n)data = pd.DataFrame(h['result']['data'])data.SYMBOL = data.SYMBOL.apply(normalize_code)return dataget_hot_stock_from_sina().head()
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
NAME | REF | SYMBOL | |
---|---|---|---|
0 | 中兴通讯 | 1638400 | 000063.XSHE |
1 | 西安旅游 | 1025241 | 000610.XSHE |
2 | 国际实业 | 1021921 | 000159.XSHE |
3 | 士兰微 | 1014388 | 600460.XSHG |
4 | 连云港 | 656515 | 601008.XSHG |
import urllibimport jsonimport pandas as pddef Xuangubao():url = "https://flash-api.xuangubao.cn/api/pool/detail?pool_name=limit_up" #涨停# url = 'https://flash-api.xuangubao.cn/api/pool/detail?pool_name=limit_up_broken' #炸板header_dict = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko'}# req = urllib2.Request(url=url, headers=header_dict)# df = pd.DataFrame(json.loads(urllib2.urlopen(req).read())['data'])req = urllib.request.Request(url,headers = header_dict)df = pd.DataFrame(json.loads(urllib.request.urlopen(req).read())['data'])df['stock_reason'] = df.surge_reason.apply(lambda x: x['stock_reason'])df['plate_name'] = df.surge_reason.apply(lambda x: x['related_plates'][0]['plate_name'])def get_plate_reason(x):try: return x['related_plates'][0][u'plate_reason']except:returndf['plate_reason'] = df.surge_reason.apply(get_plate_reason)df['limit_timeline'] = df.limit_timeline.apply(lambda x: datetime.datetime.fromtimestamp(x['items'][0]['timestamp']))df.index = df.surge_reason.apply(lambda x: normalize_code(x['symbol']))df.index.name=Nonereturn df.drop('surge_reason',axis=1)Xuangubao().head()
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
break_limit_down_times | break_limit_up_times | buy_lock_volume_ratio | change_percent | first_break_limit_down | first_break_limit_up | first_limit_down | first_limit_up | is_new_stock | issue_price | last_break_limit_down | last_break_limit_up | last_limit_down | last_limit_up | limit_down_days | limit_timeline | limit_up_days | listed_date | m_days_n_boards_boards | m_days_n_boards_days | mtm | nearly_new_acc_* | nearly_new_break_days | new_stock_acc_* | new_stock_break_limit_up | new_stock_limit_up_days | new_stock_limit_up_price_before_broken | non_restricted_capital | price | sell_lock_volume_ratio | stock_chi_name | symbol | total_capital | turnover_ratio | volume_bias_ratio | yesterday_break_limit_up_times | yesterday_first_limit_up | yesterday_last_limit_up | yesterday_limit_down_days | yesterday_limit_up_days | stock_reason | plate_name | plate_reason | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
600860.XSHG | 0 | 23 | 0.003861 | 0.100386 | 0 | 1553738504 | 0 | 1553736701 | False | 5.30 | 0 | 1553756043 | 0 | 1553756044 | 0 | 2019-03-28 09:31:41 | 2 | 768153600 | 2 | 2 | 0.0 | 0 | 0 | 0.613208 | 0 | 0 | 0 | 2.753100e+09 | 8.55 | 0 | 京城股份 | 600860.SS | 3.608100e+09 | 0.115617 | 2.875110 | 2 | 1553649900 | 1553652356 | 0 | 1 | 公司拥有亚洲地区最具规模的、技术水平最先进的铝内胆碳纤维全缠绕复合气瓶的设计测试中心及生产线... | 燃料电池 | 新能源车补贴转向充电及加氢设施 |
000638.XSHE | 0 | 0 | 0.018542 | 0.100639 | 0 | 0 | 0 | 1553737362 | False | 8.38 | 0 | 0 | 0 | 1553737362 | 0 | 2019-03-28 09:42:42 | 1 | 848937600 | 0 | 0 | 0.0 | 0 | 0 | -0.177804 | 0 | 0 | 0 | 2.131766e+09 | 6.89 | 0 | 万方发展 | 000638.SZ | 2.131766e+09 | 0.036836 | 0.654520 | 0 | 0 | 0 | 0 | 0 | 主营木材销售,转型互联网医疗大平台,参股辽宁华盛信托等公司 | 其他 | None |
300507.XSHE | 0 | 1 | 0.025224 | 0.100095 | 0 | 1553737665 | 0 | 1553736303 | False | 24.92 | 0 | 1553737665 | 0 | 1553737710 | 0 | 2019-03-28 09:25:03 | 1 | 1461859200 | 0 | 0 | 0.0 | 0 | 0 | -0.069422 | 0 | 0 | 0 | 1.359967e+09 | 23.19 | 0 | 苏奥传感 | 300507.SZ | 2.837111e+09 | 0.065284 | 2.269087 | 0 | 0 | 0 | 0 | 0 | 18年年报拟10转8 | 高送转 | None |
002274.XSHE | 0 | 4 | 0.003335 | 0.100115 | 0 | 1553736720 | 0 | 1553736726 | False | 10.01 | 0 | 1553755197 | 0 | 1553755221 | 0 | 2019-03-28 09:32:06 | 1 | 1222272000 | 0 | 0 | 0.0 | 0 | 0 | -0.044955 | 0 | 0 | 0 | 5.978181e+09 | 9.56 | 0 | 华昌化工 | 002274.SZ | 6.069737e+09 | 0.078687 | 1.413266 | 6 | 1553650494 | 1553650512 | 0 | 0 | 间接参股虹软科技 | 科创板概念股 | 第二批科创板受理名单出炉 |
000590.XSHE | 0 | 0 | 0.064759 | 0.099789 | 0 | 0 | 0 | 1553736303 | False | 6.00 | 0 | 0 | 0 | 1553736303 | 0 | 2019-03-28 09:25:03 | 6 | 821980800 | 7 | 14 | 0.0 | 0 | 0 | 1.608333 | 0 | 0 | 0 | 3.493647e+09 | 15.65 | 0 | 启迪古汉 | 000590.SZ | 3.747725e+09 | 0.004183 | 1.683131 | 0 | 1553649903 | 1553649903 | 0 | 5 | 间接控股股东启迪控股签署《合作框架协议》,本次合作后雄安集团和/或雄安新区管委会控股的基金与... | 雄安新区 | 雄安新区由规划阶段转入建设实施阶段 |
import requestsfrom bs4 import BeautifulSoupdef get_post_data(html):soup = BeautifulSoup(html)post_list = soup.find_all("div", class_="articleh")post_list_convert = []for itm in post_list:single_post = []ll = itm.find_all("span")for idx in range(len(ll)):if idx == 2:# 帖子地址和标题if ll[idx].em:single_post.append(ll[idx].em.string)else:single_post.append("")single_post.append(ll[idx].a.string)single_post.append("http://guba.eastmoney.com" + ll[idx].a['href'])elif idx == 3:# 作者信息single_post.append(ll[idx].a.string)single_post.append(ll[idx].a['href'])else:# 其他信息single_post.append(ll[idx].string)post_list_convert.append(single_post)return post_list_convertdef get_html(urls):g_ret = []for itm in urls:g_ret.append(requests.get(itm))ret = [itm.content.decode("utf-8") for itm in g_ret]return retdef control(page_count=20, step = 50):url_list = []# 构造url列表for idx in range(1, page_count + 1):url_list.append("http://guba.eastmoney.com/list,cjpl,99_{}.html".format(idx))for idx in range(0, page_count, step):# 每次爬step页print("start get html {} -> {}".format(idx, idx + step))all_html_data = get_html(url_list[idx:idx + step])print("start convert data")all_post_data = get_post_data(all_html_data[0])for itm in all_html_data[1:]:for itm2 in get_post_data(itm):all_post_data.append(itm2)return pd.DataFrame(all_post_data)control(page_count=3).head()
start get html 0 -> 50 start convert data
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | |
---|---|---|---|---|---|---|---|---|---|
0 | 631541 | 186 | None | 易纲:继续放宽对外资金融机构股比限制 | http://guba.eastmoney.com/news,cjpl,783238468.... | 财经评论 | http://iguba.eastmoney.com/9313013693864916 | 09-16 | 03-26 16:02 |
1 | 277405 | 626 | None | 李大霄:七大特征佐证2647大底 A股“大”牛市将 | http://guba.eastmoney.com/news,cjpl,783131642.... | 财经评论 | http://iguba.eastmoney.com/9313013693864916 | 09-14 | 03-20 18:25 |
2 | 178004 | 39 | None | 百亿女富豪也踩雷:这家公司业绩变脸市值缩水15 | http://guba.eastmoney.com/news,cjpl,780243957.... | 财经评论 | http://iguba.eastmoney.com/9313013693864916 | 08-27 | 02-15 16:14 |
3 | 30361 | 7 | 国家网络安全宣传周开幕 360安全大脑现出“真 | http://guba.eastmoney.com/news,cjpl,783552067.... | 财经评论 | http://iguba.eastmoney.com/9313013693864916 | 09-18 | 02-14 07:31 | |
4 | 2948 | 23 | 关于处理商誉减值问题的思考 | http://guba.eastmoney.com/news,cjpl,803866689.... | 股友jyo67w | http://iguba.eastmoney.com/7952065290232468 | 02-07 | 02-12 22:42 |
def get_jgdy_all(page_count=1,page_size=100):'''获取最近公布的机构调研数据,count为返回页数,一页为page_size条数据(不得超过5000条),默认返回100条,以发布时间排序'''url = 'http://data.eastmoney.com/DataCenter_V3/jgdy/xx.ashx?'l = []for i in range(1,page_count+1):param = {"pagesize":page_size,"page":i,'sortRule':-1,'sortType':0,'rt':51777724,}ret=requests.get(url,params=param)l.append(pd.DataFrame(json.loads(ret.text)['data']))return pd.concat(l,axis=0).set_index('SCode')df = get_jgdy_all(1)df.head()
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
ChangePercent | Close | CompanyCode | CompanyName | Description | EndDate | Licostaff | Maincontent | NoticeDate | OrgCode | OrgName | OrgSum | Orgtype | OrgtypeName | Personnel | Place | SName | StartDate | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SCode | ||||||||||||||||||
300545 | 0.27 | 29.18 | 80319562 | 特定对象调研 | 董事会秘书 钟辉,证券事务代表 杨晓芬 | 2019-03-28 | 80000073 | 华泰证券 | 005001 | 证券公司 | 陈家辉 | 深圳市联得自动化装备股份有限公司会议室 | 联得装备 | 2019-03-28 | ||||
300545 | 0.27 | 29.18 | 80319562 | 特定对象调研 | 董事会秘书 钟辉,证券事务代表 杨晓芬 | 2019-03-28 | 10000082 | 国金证券 | 005001 | 证券公司 | 韦俊龙 | 深圳市联得自动化装备股份有限公司会议室 | 联得装备 | 2019-03-28 | ||||
300545 | 0.27 | 29.18 | 80319562 | 特定对象调研 | 董事会秘书 钟辉,证券事务代表 杨晓芬 | 2019-03-28 | 10001005 | 东北证券 | 005001 | 证券公司 | 王少男 | 深圳市联得自动化装备股份有限公司会议室 | 联得装备 | 2019-03-28 | ||||
000598 | -1.53 | 4.52 | 10000935 | 成都市兴蓉投资股份有限公司 | 特定对象调研 | 赵璐,李峥,文雅 | 2019-03-28 | 80560391 | 中庚基金 | 004001 | 基金管理公司 | 胡坤 | 公司会议室 | 兴蓉环境 | 2019-03-27 | |||
000598 | -1.53 | 4.52 | 10000935 | 成都市兴蓉投资股份有限公司 | 特定对象调研 | 赵璐,李峥,文雅 | 2019-03-28 | 10001081 | 长江证券 | 005001 | 证券公司 | 徐科 | 公司会议室 | 兴蓉环境 | 2019-03-27 |
def get_jgdy_one(code):'''获取个股调研数据'''url = 'http://data.eastmoney.com/DataCenter_V3/jgdy/gsjsdy.ashx?'param = {"pagesize":100,"page":1,'sortRule':-1,'sortType':0,'code':code}ret=requests.get(url,params=param)return pd.DataFrame(json.loads(ret.text)['data'])get_jgdy_one('000568').head()
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
ChangePercent | Close | CompanyCode | CompanyName | Description | EndDate | Licostaff | Maincontent | NoticeDate | OrgCode | OrgName | OrgSum | Orgtype | OrgtypeName | Personnel | Place | SCode | SName | StartDate | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 4.63 | 64.39 | 10000910 | 泸州老窖股份有限公司 | 特定对象调研 | 王川,赵亮 | 2019-03-22 | 6 | 公司七楼会议室 | 000568 | 泸州老窖 | 2019-03-22 | |||||||
1 | 4.63 | 64.39 | 10000910 | 泸州老窖股份有限公司 | 特定对象调研 | 林锋,王川,赵亮 | 2019-03-18 | 7 | 公司八楼会议室 | 000568 | 泸州老窖 | 2019-03-17 | |||||||
2 | 4.63 | 64.39 | 10000910 | 泸州老窖股份有限公司 | 特定对象调研 | 林锋,王洪波 | 2018-12-20 | 14 | 公司一楼会议室 | 000568 | 泸州老窖 | 2018-12-19 | |||||||
3 | 4.63 | 64.39 | 10000910 | 泸州老窖股份有限公司 | 特定对象调研 | 王川,王钰 | 2018-11-29 | 12 | 公司二楼会议室 | 000568 | 泸州老窖 | 2018-11-29 | |||||||
4 | 4.63 | 64.39 | 10000910 | 泸州老窖股份有限公司 | 特定对象调研 | 王川,赵亮 | 2018-11-07 | 12 | 公司三楼会议室 | 000568 | 泸州老窖 | 2018-11-06 |
#-*- coding:utf-8 -*-import urllibimport reimport pandas as pddef get_yb(mon):http_url = 'http://data.eastmoney.com/other/qsjy/yb.%s.html'%monheader_dict = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko'}rs = urllib.request.Request(http_url,headers=header_dict)response = urllib.request.urlopen(rs)token = re.search("token=(.*?)&st", response.read().decode('gbk')).group(1)day = mon[0:4]+"-"+mon[4:6]url = "http://dcfm.eastmoney.com/em_mutisvcexpandinterface/api/js/get?type=QSYJBG_MReport&token="+token+"&st=RQ&sr=-1&p=1&ps=1000&js=(x)&filter=(RQ='"+day+"-01T00:00:00')&rt=51785853"rs = urllib.request.Request(url,headers=header_dict)response = urllib.request.urlopen(rs)return pd.read_json(response.read())get_yb('201901').head()
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
ENDATE | RQ | jlr | jlrhb | jlrtb | jzc | jzchb | jzctb | ljjlr | ljjlrhb | ljjlrtb | ljyysr | ljyysrhb | ljyysrtb | mgsdm | mgsmc | yysr | yysrhb | yysrtb | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2019-01-31T00:00:00 | 2019-01-01T00:00:00 | 5842.810000 | -0.368902 | 12.1871 | 1.344589e+06 | 0.001659 | -0.00528801 | 5842.810000 | -0.79369 | 12.1871 | 20355.500000 | -0.860572 | 1.46534 | 2500 | 山西证券 | 20355.500000 | -0.015474 | 1.46534 |
1 | 2019-01-31T00:00:00 | 2019-01-01T00:00:00 | 15203.870000 | 1.719180 | -0.231393 | 2.738747e+06 | 0.005972 | 0.00542845 | 15203.870000 | -0.581059 | -0.231393 | 56251.700000 | -0.851884 | -0.0641378 | 783 | 长江证券 | 56251.700000 | 3.269388 | -0.0641378 |
2 | 2019-01-31T00:00:00 | 2019-01-01T00:00:00 | 5222.835945 | 1.788267 | -0.198588 | 1.321474e+06 | 0.003800 | -0.0196675 | 5222.835945 | 5.61216 | -0.198588 | 16485.213626 | -0.869353 | -0.168739 | 750 | 国海证券 | 16485.213626 | -0.222980 | -0.168739 |
3 | 2019-01-31T00:00:00 | 2019-01-01T00:00:00 | 15907.560000 | 3.176557 | 4.96569 | 1.506301e+06 | -0.003985 | -0.041448 | 15907.560000 | -0.0235819 | 4.96569 | 35895.330000 | -0.838624 | 1.70169 | 686 | 东北证券 | 35895.330000 | -0.282862 | 1.70169 |
4 | 2019-01-31T00:00:00 | 2019-01-01T00:00:00 | 20874.600000 | -0.552881 | - | 4.635912e+06 | 0.006607 | - | 20874.600000 | - | - | 79549.510000 | - | - | 601066 | 中信建投 | 79549.510000 | -0.401797 | - |
内容较多,请直接点击原文链接查
import pandas as pdimport requestsfrom bs4 import BeautifulSoupdef get_sys(symbol='',page=1):""" 查询生意社大宗商品报价数据。 symbol: 生意社网页上对品种的编号,打开对应品种直接可以从url上看到 如806代表* page: 查询第几页"""txt = requests.get('http://www.100ppi.com/mprice/plist-'+symbol+'-'+str(page)+'.html').texttable = BeautifulSoup(txt).find_all("table", class_="lp-table mb15")df = pd.read_html(str(table),header=0)[0]df['price'] = df['报价'].apply(lambda x: re.findall(r"\d+\.?\d*",x)[0]).astype(int)return dfget_sys('806').head()
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
产地与品牌 | 规格 | 报价 | 报价提供方 | 发布时间 | price | |
---|---|---|---|---|---|---|
0 | 兰花清洁 | 二*的质量分数:≥99.0% | 出厂价 3150元/吨 | 山西兰花清洁能源有限责任公司 | 2019-03-27 | 3150 |
1 | 盛德源 | 二*的质量分数:≥99.0% | 出厂价 3450元/吨 | 德州盛德源化工有限公司 | 2019-03-27 | 3450 |
2 | 玉皇金宇 | 二*的质量分数:≥99.0% | 出厂价 3360元/吨 | 山东玉皇化工(集团)有限公司 | 2019-03-27 | 3360 |
3 | 冀春化工 | 二*的质量分数:≥99.0% | 出厂价 3450元/吨 | 河北冀春化工有限公司 | 2019-03-27 | 3450 |
4 | 河南义马 | 二*的质量分数:≥99.0% | 出厂价 3310元/吨 | 河南义马新源化工能源有限责任公司 | 2019-03-27 | 3310 |
import requestsimport jsonimport pandas as pdimport timefrom sqlalchemy import create_enginedef get_bnd_yield(year=10):ids = {10: '29227', 5: '29234', 1: '29231'}url = 'https://cn.investing.com/common/modules/js_instrument_chart/api/data.php?' + \'pair_id={}&pair_id_for_news={}'.format(ids[year], ids[year]) +\'&chart_type=area&pair_interval=month&candle_count=120&events=yes&volume_series=yes&period=5-years'headers = {}headers['X-Requested-With'] = 'XMLHttpRequest'headers['Host'] = 'cn.investing.com'headers['Referer'] = 'https://cn.investing.com/rates-bonds/china-{}-year-bond-yield'.format(year)headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'res = requests.get(url, headers=headers)res = json.loads(res.content.decode('utf-8').replace("'", "\""))data = pd.DataFrame(res['candles'])data = data.iloc[:, :2]data.columns = ['date', 'y'+str(year)]data['date'] = data['date'].map(lambda x: time.strftime("%Y-%m-%d", time.localtime(int(str(x)[:10]))))data.set_index('date', inplace=True)return datadef get_bnd_yields(years=[1, 5, 10]):bag = pd.DataFrame()for yr in years:bag = pd.concat([bag, get_bnd_yield(year=yr)], axis=1)#print(bag.head())return bagget_bnd_yields().head()
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
y1 | y5 | y10 | |
---|---|---|---|
date | |||
2014-04-01 | 3.650 | 4.160 | 4.330 |
2014-05-01 | 3.360 | 4.010 | 4.160 |
2014-06-01 | 3.370 | 3.860 | 4.060 |
2014-07-01 | 3.763 | 4.031 | 4.298 |
2014-08-01 | 3.799 | 3.998 | 4.248 |
本社区仅针对特定人员开放
查看需注册登录并通过风险意识测评
5秒后跳转登录页面...
移动端课程