金融数据获取(1)

数据可以说是量化投资的根本,一切投资策略都是建立在数据基础上的。

一、历史数据

1、pandas_datareader

Pandas库提供了专门从财经网站获取金融数据的API接口,可作为量化交易股票数据获取的一种途径,该接口在urllib3库基础上实现了以客户端身份访问网站的股票数据。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import pandas as pd
import pandas_datareader.data as web
import datetime
import numpy as np

#========================================================
# 获取股票数据
#========================================================

start = datetime.datetime(2019, 7, 1) # 获取数据的起始时间
end = datetime.date.today() # 获取数据的结束时间
stock_code = "600797"
data = web.DataReader(stock_code+".SS", # 上证股票在股票代码后面加上".SS", 深圳股票在股票代码后面加上".SZ"
"yahoo", # 数据源
start, end)
# print(data)
# print(data.index)
# print(data.colums)

#========================================================
# 数据加工
# 数据中增加涨/跌幅列, 涨/跌 =(当日Close-上一日Close)/上一日Close*100%
#========================================================

# 添加一列change, 存储当日股票价格与前一日收盘价格相比的涨跌数值
change = data.Close.diff()
data['Change'] = change
# 对缺失的数据用涨跌值的均值就地替代NaN
change.fillna(change.mean(), inplace=True)
# 计算涨跌幅度
data['pct_change'] = data['Change'] /data['Close'].shift(1)
# data['pct_change1'] = data.Close.pct_change() # 方法二

#========================================================
# 数据加工
# 增加跳空缺口数值序列, 上涨趋势中今天的最低价高于昨天收盘价为向上跳空, 下跌趋势中昨天收盘价高于今天最高价为向下跳空
#========================================================

data['preClose'] = data['Close'] - data['Change']
jump = []
for row_index in np.arange(data.shape[0]):
today = data.ix[row_index] # 通过行标签或行号索引数据
if today['pct_change'] > 0 and (today['Low'] - today['preClose']) > 0: # 上涨趋势中向上跳空
t = today['Low'] - today['preClose']
elif today['pct_change'] < 0 and (today['High'] - today['preClose']) < 0: # 下涨趋势中向下跳空
t = today['High'] - today['preClose']
else:
t = None
jump.append(t)
data['jump'] = jump

#========================================================
# 数据存储
#========================================================

data.to_csv("F://stock_"+stock_code+".csv", columns=data.columns, index=True)

2、tushare

TuShare挖地兔是一个免费、开源的python财经数据接口包,数据覆盖范围广,包括了股票/指数/期货/期权/基金/数字货币/上市公告/新闻/市场参考数据等等。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import pandas as pd
import tushare as ts
import numpy as np

#========================================================
# 获取股票数据(旧版)
#========================================================

# ts.get_hist_data('600848') # 获取日k线数据
# ts.get_hist_data('600848', ktype='W') # 获取周k线数据
# ts.get_hist_data('600848', ktype='M') # 获取月k线数据
# ts.get_hist_data('600848', ktype='5') # 获取5分钟k线数据
# ts.get_hist_data('600848', ktype='15') # 获取15分钟k线数据
# ts.get_hist_data('600848', ktype='30') # 获取30分钟k线数据
# ts.get_hist_data('600848', ktype='60') # 获取60分钟k线数据
# ts.get_hist_data('sh') # 获取上证指数k线数据
# ts.get_hist_data('sz') # 获取深圳成指k线数据
# ts.get_hist_data('hs300') # 获取沪深300指数k线数据
# ts.get_hist_data('sz50') # 获取上证50指数k线数据
# ts.get_hist_data('zxb') # 获取中小板指数k线数据
# ts.get_hist_data('cyb') # 获取创业板指数k线数据
# ts.get_tick_data('600848', date='2020-01-20', src='tt') # 历史分笔接口只能获取当前交易日之前的数据
# ts.get_sina_dd('600848', date='2020-02-28', vol=500) # 指定大于等于500手的数据

#========================================================
# 获取股票数据(Pro版)
#========================================================

start = '20190701' # 获取数据的起始时间
end = '20191201' # 获取数据的结束时间
stock_code = "000001.SZ"

pro = ts.pro_api('your code') # 去TuShare网站注册后获得的接口token
data = pro.daily(ts_code=stock_code, start_date=start, end_date=end)
# print(data)
# print(data.index)
# print(data.colums)

#========================================================
# 数据存储
#========================================================

data.to_csv("F://stock_"+stock_code+".csv", columns=data.columns, index=True)

3、财经网站

英为财经

二、实时数据

1、easyquotation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import pandas as pd
import easyquotation
import datetime

#========================================================
# 获取股票数据(沪深股)
#========================================================

quotation = easyquotation.use("sina")
stock_code = '000159'
ori_data = quotation.real(stock_code)[stock_code]

columns = ['name', 'open', 'close', 'now', 'high', 'low', 'volume', 'date', 'time']
data = []
for column in columns:
data.append(ori_data[column])

print(data)

#========================================================
# 获取股票数据(港股)
#========================================================

quotation2 = easyquotation.use("hkquote")
stock_code2 = '00001'
ori_data2 = quotation2.real(stock_code2)[stock_code2]

columns2 = ['name', 'lastPrice', 'openPrice', 'high', 'low', 'price', 'amount', 'time']
data2 = []
for column in columns2:
data2.append(ori_data2[column])

print(data2)
2、tushare
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import pandas as pd
import tushare as ts
import datetime

#========================================================
# 获取股票实时行情数据
#========================================================

stock_today = ts.get_today_all()

en_today_columns = ['code', 'name', 'changepercent', 'trade', 'open', 'high', 'low', 'settlement', 'volume', \
'turnoverratio', 'amount', 'per', 'pb', 'mktcap', 'nmc']
cn_today_columns = ['代码', '名称', '涨跌幅', '现价', '开盘价', '最高价', '最低价', '昨日收盘价', '成交量', \
'换手率', '成交金额', '市盈率', '市净率', '总市值', '流通市值']

print(stock_today)

#========================================================
# 获取股票实时分笔数据
#========================================================

realtime_quotes = ts.get_realtime_quotes('000581')
realtime_quotes = realtime_quotes.loc[:, ['code', 'name', 'price', 'bid', 'ask', 'volume', 'amount', 'time']]

en_quotes_columns = ['code', 'name', 'price', 'bid', 'ask', 'volume', 'amount', 'time']
cn_quotes_columns = ['代码', '名称', '当前价格', '竞买价', '竞卖价', '成交量', '成交金额', '时间']

print(realtime_quotes)

tushare

0%