对于静态加载的数据,使用pd.read_html()
import pandas as pd
url_static = "http://www.fortunechina.com/fortune500/c/2023-08/02/content_436877.htm"
df1 = pd.read_html(url_static, header=0)[0]
print(df1)
对于动态加载的数据,直接访问接口,使用pd.read_json()
import pandas as pd
url_dynamic = "http://31.push2.eastmoney.com/api/qt/clist/get?" \
"pn=1&pz=9999&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&" \
"fltt=2&invt=2&wbp2u=|0|0|0|web&fid=f3&fs=m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048&" \
"fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22," \
"f11,f62,f128,f136,f115,f152&_=1689749812327"
datas = pd.read_json(url_dynamic)['data']['diff']
result = []
for data in datas:
data_info = {
'代码': data['f12'],
'名称': data['f14'],
'最新价': data['f2'],
'涨跌幅': data['f3'],
'涨跌额': data['f4'],
'成交量(手)': data['f5'],
'成交额': data['f6'],
'振幅': data['f7'],
'最高价': data['f15'],
'最低价': data['f16'],
'今开': data['f17'],
'昨收': data['f18'],
'量比': data['f10'],
'换手率': data['f8'],
'市盈率(动态)': data['f9'],
'市净率': data['f23']
}
result.append(data_info)
df2 = pd.DataFrame(result)
print(df2)