前面我们已经把519961(基金编码)这种基金的历史净值明细表html内容抓取到了本地,现在我们还是需要 解析html,取出相关的值,然后保存为csv文件以便pandas
from bs4 import BeautifulSoup import os import csv # 使用 BeautifulSoup 解析html内容 def getFundDetailData(html): soup = BeautifulSoup(html,"html.parser") rows = soup.find("table").tbody.find_all("tr") result = [] for row in rows: tds=row.find_all('td') result.append({"fcode": '519961' ,"fdate": tds[0].get_text() , "NAV": tds[1].get_text() , "ACCNAV": tds[2].get_text() , "DGR": tds[3].get_text() , "pstate":tds[4].get_text() , "rstate": tds[5].get_text() } ) return result # 把解析之后的数据写入到csv文件 def writeToCSV(): data_dir = "../htmls/details" all_path = os.listdir(data_dir) all_result = [] for path in all_path: if os.path.isfile(os.path.join(data_dir,path)): with open(os.path.join(data_dir,path),"rb") as f: content = f.read().decode("utf-8") f.close() all_result = all_result + getFundDetailData(content) with open("../csv/519961.csv","w",encoding="utf-8",newline="") as f: writer = csv.writer(f) writer.writerow(['fcode', 'fdate', 'NAV', "ACCNAV", 'DGR', 'pstate', "rstate"]) for r in all_result: writer.writerow([r["fcode"], r["fdate"], r["NAV"], r["ACCNAV"], r["DGR"], r["pstate"], r["rstate"]]) f.close()
Copyright © 2009-2022 www.kswsj.com 成都快上网科技有限公司 版权所有 蜀ICP备19037934号