金融数据获取(2)

为了更方便地进行策略模拟,将金融数据存入本地数据库中。

存储到sqlite里

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
import pandas as pd
import tushare as ts
import warnings; warnings.simplefilter('ignore')
import sqlite3, time, datetime

class SqliteDb():
def __init__(self, DBname):
self.conn = sqlite3.connect(DBname+".db")

# 创建表
def create_table(self, tbname, content):
sql = "CREATE TABLE IF NOT EXISTS "+tbname+" ("+content+");"
self.conn.execute(sql)
self.conn.commit()

# 删除表
def delete_table(self, tbname):
try:
self.conn.execute('DROP TABLE '+tbname)
self.conn.commit()
except:
print('表不存在')

# 刷新表
def update_table(self, df, tbname, mode):
try:
df.to_sql(tbname, self.conn,
if_exists=mode, # replace:若表存在则覆盖原来表里的数据/append:若表存在将数据写到原表的后面
index=False, # 是否将df的index单独写到一列中
index_label=None, chunksize=None, dtype=None)
except:
print('表刷新失败')

# 查询数据
def query(self, sql, tbname=None):
if tbname:
result = self.conn.execute("PRAGMA table_info("+str(tbname)+")")
col_names = [i[1] for i in result]
else:
col_names = None
return pd.DataFrame(self.conn.execute(sql).fetchall(), columns=col_names)

# 插入数据
def insert(self, tbname, content):
self.conn.execute("INSERT INTO "+tbname+" VALUES ("+content+");")
self.conn.commit()

# 关闭连接
def close(self):
self.conn.close()

c = SqliteDb('quotation')


#========================================================
# 存储行业股
#========================================================

# industry_classified = ts.get_industry_classified()
# en_industry_columns = ['code', 'name', 'c_name']
# cn_industry_columns = ['代码', '名称', '行业名称']

# c.update_table(industry_classified, 'industry_classified', 'replace')
# print(c.query('select * from industry_classified'))

#========================================================
# 存储概念股
#========================================================

# concept_classified = ts.get_concept_classified()
# en_concept_columns = ['code', 'name', 'c_name']
# cn_concept_columns = ['代码', '名称', '概念名称']

# c.update_table(concept_classified, 'concept_classified', 'replace')

#========================================================
# 存储创业板股
#========================================================

# gem_classified = ts.get_gem_classified()
# en_gem_columns = ['code', 'name']
# cn_gem_columns = ['代码', '名称']

# c.update_table(gem_classified, 'gem_classified', 'replace')

#========================================================
# 存储沪深300成份及权重
#========================================================

# hs300s = ts.get_hs300s()
# en_hs300_columns = ['code', 'name', 'date', 'weight']
# cn_hs300_columns = ['代码', '名称', '日期', '权重']

# c.update_table(hs300s, 'hs300s', 'replace')
# print(c.query('select * from hs300s'))

#========================================================
# 存储上证50成份股
#========================================================

# gz50s = ts.get_sz50s()
# en_gz50s_columns = ['code', 'name']
# cn_gz50s_columns = ['代码', '名称']

# c.update_table(gz50s, 'gz50s', 'replace')

#========================================================
# 存储中证500成份股
#========================================================

# zz500s = ts.get_zz500s()
# en_zz500s_columns = ['code', 'name']
# cn_zz500s_columns = ['代码', '名称']

# c.update_table(zz500s, 'zz500s', 'replace')

#========================================================
# 存储沪深300成份及权重
#========================================================

# hs300s = ts.get_hs300s()
# en_hs300_columns = ['code', 'name', 'date', 'weight']
# cn_hs300_columns = ['代码', '名称', '日期', '权重']

# c.update_table(hs300s, 'hs300s', 'replace')
# print(c.query('select * from hs300s'))

#========================================================
# 存储股票基本信息
#========================================================

# stock_basics = ts.get_stock_basics()
# stock_basics.reset_index(inplace=True)
#
# en_stock_columns = ['code', 'name', 'industry', 'area', 'pe', 'outstanding', 'totals', 'totalAssets', 'liquidAssets', \
# 'fixedAssets', 'reserved', 'reservedPerShare', 'esp', 'bvps', 'pb', 'timeToMarket', 'undp', 'perundp', \
# 'rev', 'profit', 'gpr', 'npr', 'holders']
# cn_stock_columns = ['代码', '名称', '所属行业', '地区', '市盈率', '流通股本(亿)', '总股本(亿)', '总资产(万)', '流动资产', \
# '固定资产', '公积金', '每股公积金', '每股收益', '每股净资', '市净率', '上市日期', '未分利润', '每股未分配', \
# '收入同比', '利润同比', '毛利率', '净利润率', '股东人数']
#
# c.update_table(stock_basics, 'stock_basics', 'replace')
# print(c.query('select * from stock_basics'))

#========================================================
# 存储基本面数据
#========================================================

def save_fund(years, quarters, mode):
for year in years:
for quarter in quarters:
# 盈利能力数据
profit_data = ts.get_profit_data(year, quarter)
en_profit_colunms = ['code', 'name', 'roe', 'net_profit_ratio', 'gross_profit_rate', 'net_profits', 'esp', 'business_income', 'bips']
cn_profit_colunms = ['代码', '名称', '净资产收益率', '净利率', '毛利率', '净利润(万元)', '每股收益', '营业收入(百万元)', '每股主营业务收入(元)']
# 营运能力数据
operation_data = ts.get_operation_data(year, quarter)
en_operation_colunms = ['code', 'name', 'arturnover', 'arturndays', 'inventory_turnover', 'inventory_days', 'currentasset_turnover', 'currentasset_days']
cn_operation_colunms = ['代码', '名称', '应收账款周转率(次)', '应收账款周转天数(天)', '存货周转率(次)', '存货周转天数(天)', '流动资产周转率(次)', '流动资产周转天数(天)']
# 债偿能力数据
debtpaying_data = ts.get_debtpaying_data(year, quarter)
en_debtpaying_colunms = ['code', 'name', 'currentratio', 'quickratio', 'cashratio', 'icratio', 'sheqratio', 'adratio']
cn_debtpaying_colunms = ['代码', '名称', '流动比率', '速动比率', '现金比率', '利息支付倍数', '股东权益比率', '股东权益增长率']
# 现金流量数据
cashflow_data = ts.get_cashflow_data(year, quarter)
en_cashflow_colunms = ['code', 'name', 'cf_sales', 'rateofreturn', 'cf_nm', 'cf_liabilities', 'cashflowratio']
cn_cashflow_colunms = ['代码', '名称', '经营现金净流量对销售收入比率', '资产的经营现金流量回报率', '经营现金净流量与净利润的比率', '经营现金净流量对负债比率', '现金流量比率']
data1 = pd.merge(profit_data, operation_data, on=['code', 'name'])
data2 = pd.merge(data1, debtpaying_data, on=['code', 'name'])
data3 = pd.merge(data2, cashflow_data, on=['code', 'name'])
data3['year'] = year
data3['quarter'] = quarter
c.update_table(data3, 'stock_fund', mode)

# save_fund([2016], [4], 'replace')
# save_fund([2017, 2018, 2019], [1, 2, 3, 4], 'append')
# print(c.query('select * from stock_fund'))

#========================================================
# 存储历史行情数据
#========================================================

# stock_codes = c.query('select code from stock_basics')[0]
# for stock_code in stock_codes:
# if len(stock_code) == 6:
# try:
# history_d = ts.get_hist_data(stock_code)
# history_d['code'] = stock_code
# history_d['date'] = history_d.index
# c.update_table(history_d, 'history_d', 'append')
# except:
# print(stock_code)
# print(c.query('select count(distinct code) from history_d'))

#========================================================
# 存储实时行情数据
#========================================================

# stock_today = ts.get_today_all()
#
# en_today_columns = ['code', 'name', 'changepercent', 'trade', 'open', 'high', 'low', 'settlement', 'volume', \
# 'turnoverratio', 'amount', 'per', 'pb', 'mktcap', 'nmc']
# cn_today_columns = ['代码', '名称', '涨跌幅', '现价(元)', '开盘价', '最高价', '最低价', '昨日收盘价', '成交量', \
# '换手率', '成交金额', '市盈率', '市净率', '总市值(万元)', '流通市值(万元)']
#
# c.update_table(stock_today, 'stock_today', 'replace')
# print(c.query('select * from stock_today', 'stock_today'))
0%