量化交易----编程实例:爬取标普500指数股票数据
2017-11-04 00:50
671 查看
编程实战:首先建立数据库用于存储数据,接着在维基百科上爬取标普500的股票代码,最后利用雅虎财经的API接口爬取股票的历史价格数据
表symbol用于存储标普500的股票的描述信息
表daily_price用于存储股票的每日价格
余下的两个数据表后期会使用到
![](https://oscdn.geek-share.com/Uploads/Images/Content/202009/03/d92fc06619f4629b5575af9ecaf28b1f)
实际上pandas提供了函数,可直接爬取雅虎上的数据
1. 建立数据库
在MySql数据库上建立四个数据表表symbol用于存储标普500的股票的描述信息
表daily_price用于存储股票的每日价格
余下的两个数据表后期会使用到
CREATE TABLE `exchange` ( `id` int NOT NULL AUTO_INCREMENT, `abbrev` varchar(32) NOT NULL, `name` varchar(255) NOT NULL, `city` varchar(255) NULL, `country` varchar(255) NULL, `currency` varchar(64) NULL, `timezone_offset` time NULL, `created_date` datetime NOT NULL, `last_updated_date` datetime NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8; CREATE TABLE `data_vendor` ( `id` int NOT NULL AUTO_INCREMENT, `name` varchar(64) NOT NULL, `website_url` varchar(255) NULL, `support_email` varchar(255) NULL, `created_date` datetime NOT NULL, `last_updated_date` datetime NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8; CREATE TABLE `symbol` ( `id` int NOT NULL AUTO_INCREMENT, `exchange_id` int NULL, `ticker` varchar(32) NOT NULL, `instrument` varchar(64) NOT NULL, `name` varchar(255) NULL, `sector` varchar(255) NULL, `currency` varchar(32) NULL, `created_date` datetime NOT NULL, `last_updated_date` datetime NOT NULL, PRIMARY KEY (`id`), KEY `index_exchange_id` (`exchange_id`) ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8; CREATE TABLE `daily_price` ( `id` int NOT NULL AUTO_INCREMENT, `data_vendor_id` int NOT NULL, `symbol_id` int NOT NULL, `price_date` datetime NOT NULL, `created_date` datetime NOT NULL, `last_updated_date` datetime NOT NULL, `open_price` decimal(19,4) NULL, `high_price` decimal(19,4) NULL, `low_price` decimal(19,4) NULL, `close_price` decimal(19,4) NULL, `adj_close_price` decimal(19,4) NULL, `volume` bigint NULL, PRIMARY KEY (`id`), KEY `index_data_vendor_id` (`data_vendor_id`), KEY `index_symbol_id` (`symbol_id`) ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
2. 爬取标普500的股票代码
代码语法为python2.7#!/usr/bin/python # -*- coding: utf-8 -*- # insert_symbols.py from __future__ import print_function import datetime from math import ceil import bs4 import MySQLdb as mdb import requests def obtain_parse_wiki_snp500(): """ Download and parse the Wikipedia list of S&P500 constituents using requests and BeautifulSoup. Returns a list of tuples for to add to MySQL. """ # Stores the current time, for the created_at record now = datetime.datetime.utcnow() # Use requests and BeautifulSoup to download the # list of S&P500 companies and obtain the symbol table response = requests.get( "http://en.wikipedia.org/wiki/List_of_S%26P_500_companies" ) soup = bs4.BeautifulSoup(response.text) # This selects the first table, using CSS Selector syntax # and then ignores the header row ([1:]) symbolslist = soup.select('table')[0].select('tr')[1:] # Obtain the symbol information for each # row in the S&P500 constituent table symbols = [] for i, symbol in enumerate(symbolslist): tds = symbol.select('td') symbols.append( ( tds[0].select('a')[0].text, # Ticker 'stock', tds[1].select('a')[0].text, # Name tds[3].text, # Sector 'USD', now, now ) ) return symbols def insert_snp500_symbols(symbols): """ Insert the S&P500 symbols into the MySQL database. """ # Connect to the MySQL instance db_host = 'localhost' db_user = 'sec_user' db_pass = 'password' db_name = 'securities_master' con = mdb.connect( host=db_host, user=db_user, passwd=db_pass, db=db_name ) # Create the insert strings column_str = """ticker, instrument, name, sector, currency, created_date, last_updated_date """ insert_str = ("%s, " * 7)[:-2] final_str = "INSERT INTO symbol (%s) VALUES (%s)" % \ (column_str, insert_str) # Using the MySQL connection, carry out # an INSERT INTO for every symbol with con: cur = con.cursor() cur.executemany(final_str, symbols) if __name__ == "__main__": symbols = obtain_parse_wiki_snp500() insert_snp500_symbols(symbols) print("%s symbols were successfully added." % len(symbols))
3. 利用股票代码爬取价格数据
#!/usr/bin/python # -*- coding: utf-8 -*- # price_retrieval.py from __future__ import print_function import datetime import warnings import MySQLdb as mdb import requests # Obtain a database connection to the MySQL instance db_host = 'localhost' db_user = 'sec_user' db_pass = 'password' db_name = 'securities_master' con = mdb.connect(db_host, db_user, db_pass, db_name) def obtain_list_of_db_tickers(): """ Obtains a list of the ticker symbols in the database. """ with con: cur = con.cursor() cur.execute("SELECT id, ticker FROM symbol") data = cur.fetchall() return [(d[0], d[1]) for d in data] def get_daily_historic_data_yahoo( ticker, start_date=(2000,1,1), end_date=datetime.date.today().timetuple()[0:3] ): """ Obtains data from Yahoo Finance returns and a list of tuples. ticker: Yahoo Finance ticker symbol, e.g. "GOOG" for Google, Inc. start_date: Start date in (YYYY, M, D) format end_date: End date in (YYYY, M, D) format """ # Construct the Yahoo URL with the correct integer query parameters # for start and end dates. Note that some parameters are zero-based! ticker_tup = ( ticker, start_date[1]-1, start_date[2], start_date[0], end_date[1]-1, end_date[2], end_date[0] ) yahoo_url = "http://ichart.finance.yahoo.com/table.csv" yahoo_url += "?s=%s&a=%s&b=%s&c=%s&d=%s&e=%s&f=%s" yahoo_url = yahoo_url % ticker_tup # Try connecting to Yahoo Finance and obtaining the data # On failure, print an error message. try: yf_data = requests.get(yahoo_url).text.split("\n")[1:-1] prices = [] for y in yf_data: p = y.strip().split(',') prices.append( (datetime.datetime.strptime(p[0], '%Y-%m-%d'), p[1], p[2], p[3], p[4], p[5], p[6]) ) except Exception as e: print("Could not download Yahoo data: %s" % e) return prices def insert_daily_data_into_db( data_vendor_id, symbol_id, daily_data ): """ Takes a list of tuples of daily data and adds it to the MySQL database. Appends the vendor ID and symbol ID to the data. daily_data: List of tuples of the OHLC data (with adj_close and volume) """ # Create the time now now = datetime.datetime.utcnow() # Amend the data to include the vendor ID and symbol ID daily_data = [ (data_vendor_id, symbol_id, d[0], now, now, d[1], d[2], d[3], d[4], d[5], d[6]) for d in daily_data ] # Create the insert strings column_str = """data_vendor_id, symbol_id, price_date, created_date, last_updated_date, open_price, high_price, low_price, close_price, volume, adj_close_price""" insert_str = ("%s, " * 11)[:-2] final_str = "INSERT INTO daily_price (%s) VALUES (%s)" % \ (column_str, insert_str) # Using the MySQL connection, carry out an INSERT INTO for every symbol with con: cur = con.cursor() cur.executemany(final_str, daily_data) if __name__ == "__main__": # This ignores the warnings regarding Data Truncation # from the Yahoo precision to Decimal(19,4) datatypes warnings.filterwarnings('ignore') # Loop over the tickers and insert the daily historical # data into the database tickers = obtain_list_of_db_tickers() lentickers = len(tickers) for i, t in enumerate(tickers): print( "Adding data for %s: %s out of %s" % (t[1], i+1, lentickers) ) yf_data = get_daily_historic_data_yahoo(t[1]) insert_daily_data_into_db('1', t[0], yf_data) print("Successfully added Yahoo Finance pricing data to DB.")
4. 显示数据
#!/usr/bin/python # -*- coding: utf-8 -*- # retrieving_data.py from __future__ import print_function import pandas as pd import MySQLdb as mdb if __name__ == "__main__": # Connect to the MySQL instance db_host = 'localhost' db_user = 'sec_user' db_pass = 'password' db_name = 'securities_master' con = mdb.connect(db_host, db_user, db_pass, db_name) # Select all of the historic Google adjusted close data sql = """SELECT dp.price_date, dp.adj_close_price FROM symbol AS sym INNER JOIN daily_price AS dp ON dp.symbol_id = sym.id WHERE sym.ticker = 'GOOG' ORDER BY dp.price_date ASC;""" # Create a pandas dataframe from the SQL query goog = pd.read_sql_query(sql, con=con, index_col='price_date') # Output the dataframe tail print(goog.tail())
另一种简易方法
以上操作可能有点麻烦实际上pandas提供了函数,可直接爬取雅虎上的数据
from __future__ import print_function import datetime import pandas,io,data as web if __name__ == "__main__": spy = web.DataReader( "SPY", "yahoo", datetime.datetime(2017,1,1), datetiem.datetime(2015.6.15) ) print(spy.tail())
相关文章推荐
- R实例:构建量化分析系统(一)— 获取交易数据
- 量化交易----获取沪深300股票数据
- python 获取股票的交易数据
- C#通讯编程入门--串口数据接收发送实例
- Android传感器编程实例开发——三轴数据采集
- XML编程实例--对xml配置文件中的数据进行增、删、改、查
- 从零学python系列之数据处理编程实例(一)
- 从零学python系列之数据处理编程实例(二)
- python网络编程之数据传输UDP实例分析
- 网上爬取股票数据实例
- hadoop编程实例--数据排序
- 获取股票实时交易数据的方法
- socket编程技巧(1)tcp接收接口(变长数据定长数据)的编写实例
- 有一头母牛,它每年年初生一头小母牛。每头小母牛从第四个年头开始,每年年初也生一头小母牛。请编程实现在第n年的时候,共有多少头母牛?输入数据由多个测试实例组成,每个测试实例占一行,包括一个整数n(0<n
- 【分析】Ceph编程实例 接口Librbd(C++) -- 映像创建与数据读写
- Qt: 网络编程实例,QNetworkAccessManager获取网页数据
- (NEON实例一)ARM处理器NEON编程及优化技巧——数据加载和存储
- 获取股票实时交易数据的方法
- Java JNI 编程进阶 实例+c++数据类型与jni数据类型转换
- iOS网络编程-iCloud键值数据存储编程实例