您的位置:首页 > 编程语言 > Python开发

数据分析与挖掘入门——学习笔记(四)利用NumPy进行历史股价分析

2018-08-22 10:55 459 查看

利用NumPy进行历史股价分析

该练习使用的csv文件如下:

其中第二列为日期,格式是%d-%m-%Y;第四到七列分别是当日开盘价,最高价,最低价,收盘价和成交量
准备工作:

import sys
import numpy as np

# 读入文件,函数是loadtxt
# 第一个参数是文件名,delimiter是获取的数据的分隔符,usecols是指定读取哪列的数据,unpack为True表示获取的数据根据列分为不同的数组
# 这里分隔符是“,”,读取第6,7行(收盘价和交易量),unpack为true,分开存储为不同的变量
c, v = np.loadtxt('./data.csv', delimiter=',', usecols=(6, 7), unpack=True)

1 计算成交量加权平均值

np中使用average()计算加权平均值,第一个参数为计算的数组,第二个参数为加权平均值的权重

vwap = np.average(c, weights=v)
vwap
# 350.58954935320088

2 计算算术平均值

np.mean(c)  # 计算算术平均值
# 351.03766666666672

3 计算时间加权平均值

t = np.arange(len(c))
np.average(c, weights=t)  # 计算时间加权平均值
# 352.42832183908041

4 寻找最大值和最小值

h,l = np.loadtxt('./data.csv', delimiter=',', usecols=(4, 5), unpack=True)
np.max(h)
# 364.89999999999998

np.min(l)
# 333.52999999999997

5 计算中程数和极差

(np.max(h) + np.min(l))  / 2  # 计算中程数
# 349.21499999999997

np.ptp(h)  # 计算最大值和最小值之间的差值 极差
# 24.859999999999957

np.ptp(l)  # 计算交易量的极差
# 26.970000000000027

6 统计分析

c = np.loadtxt('./data.csv', delimiter=',', usecols=(6,), unpack=True)  # 读取收盘价

np.median(c)  # 中间数
# 352.05500000000001

sorted = np.msort(c)
sorted   # 可见中间数是351.99和352.12的平均值
# array([ 336.1 ,  338.61,  339.32,  342.62,  342.88,  343.44,  344.32,
#         345.03,  346.5 ,  346.67,  348.16,  349.31,  350.56,  351.88,
#         351.99,  352.12,  352.47,  353.21,  354.54,  355.2 ,  355.36,
#         355.76,  356.85,  358.16,  358.3 ,  359.18,  359.56,  359.9 ,
#         360.  ,  363.13])

np.var(c)  # 方差的计算
# 50.126517888888884

np.mean((c - c.mean()) ** 2)  # 手动计算方差
# 50.126517888888884

7 股票收益率计算

c = np.loadtxt('./data.csv', delimiter=',', usecols=(6,), unpack=True)
returns = np.diff(c) / c[: -1]  # diff计算差分,总体是求股票收益率

np.std(returns)  # 求标准差
# 0.012922134436826306

log_returns = np.diff(np.log(c))  # 股票对数收益率
log_returns
# array([ 0.00953488,  0.01668775, -0.00205991, -0.00255903,  0.00887039,
#         0.01540739,  0.0093908 ,  0.0082988 , -0.01015864,  0.00649435,
#         0.00650813,  0.00200256,  0.00893468, -0.01339027, -0.02183875,
#        -0.03468287,  0.01177296,  0.00075857,  0.01528161,  0.01440064,
#        -0.011103  ,  0.00801225,  0.02090904,  0.00122297, -0.01297267,
#         0.00112499, -0.00929083, -0.01659219,  0.01522945])

posretindices = np.where(returns > 0)  # 求收益率大于0的那几天
posretindices
# (array([ 0,  1,  4,  5,  6,  7,  9, 10, 11, 12, 16, 17, 18, 19, 21, 22, 23,
#         25, 28], dtype=int64),)

ann_vol = np.std(log_returns) / np.mean(log_returns)  # 日收益率
ann_vol = ann_vol / np.sqrt(1. / 252.)  # 年收益率
ann_vol
# 129.27478991115132

ann_vol * np.sqrt( 1. / 12.)  # 月收益率
# 37.318417377317765

8 日期分析

用以下数字代表星期
Monday 0
Tuesday 1
Wednesday 2
Thursday 3
Friday 4
星期六日股市休市,不考虑了

定义一个把日期转换为数字的函数

from datetime import datetime
def date2num(s):
return datetime.strptime(s, '%d-%m-%Y').date().weekday()

# 读取收盘价
close = np.loadtxt('data.csv', delimiter=',', usecols=(6,), unpack=True)
# 读取日期
dates = np.loadtxt('data.csv', delimiter=',', usecols=(1,), unpack=True, dtype=bytes).astype(str)

# 将读取的str数据转换为数字
for i in range(dates.size):
dates[i] = date2num(dates[i])

dates = dates.astype(np.int8)
dates.dtype
# dtype('int8')

averages = np.zeros(5)  # 定义一个空数组
# 依次获取不同天的索引,然后根据索引求得close(收盘价)数组中所有的对应值,计算平均值,把平均值加入到averages数组中
for i in range(5):
indices = np.where(dates == i)
prices = np.take(close, indices)
avg = np.mean(prices)
print("Day", i, "prices", prices, "Average", avg)
averages[i] = avg
# Day 0 prices [[ 339.32  351.88  359.18  353.21  355.36]] Average 351.79
# Day 1 prices [[ 345.03  355.2   359.9   338.61  349.31  355.76]] Average 350.635
# Day 2 prices [[ 344.32  358.16  363.13  342.62  352.12  352.47]] Average 352.136666667
# Day 3 prices [[ 343.44  354.54  358.3   342.88  359.56  346.67]] Average 350.898333333
# Day 4 prices [[ 336.1   346.5   356.85  350.56  348.16  360.    351.99]] Average 350.022857143

top = np.max(averages)  # 计算周每日的收盘价平均值的最大值
# 352.1366666666666

np.argmax(averages)  # 获取周每日的收盘价平均值的最大值是哪一天
# 2

bottom = np.min(averages)   # 计算周每日的收盘价平均值的最小值
# 350.02285714285711

np.argmin(averages)  # 获取周每日的收盘价平均值的最小值是哪一天
# 4

9 周数据汇总

以周为单位进行计算

#读取日期
dates = np.loadtxt('data.csv', delimiter=',', usecols=(1,), unpack=True, dtype=bytes).astype(str)
for i in range(dates.size):
dates[i] = date2num(dates[i])
dates = dates.astype(np.int8)
dates.dtype
# dtype('int8')

open, high, low, close=np.loadtxt('data.csv', delimiter=',', usecols=(3, 4, 5, 6), unpack=True)

close = close[:16]  # 因为后面有一周的星期五是放假,所以不纳入计算,再往后数据不齐,也不计算
dates = dates[:16]

first_Monday = np.ravel(np.where(dates == 0))[0]  # 获取第一个星期一
first_Monday
# 1

last_Friday = np.ravel(np.where(dates == 4))[-1]
last_Friday
# 15

# 将这十五天分割为三个星期
week_indices = np.arange(first_Monday, last_Friday + 1)
week_indices
# array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15], dtype=int64)
week_indices = np.split(week_indices, 3)
week_indices
# [array([1, 2, 3, 4, 5], dtype=int64),
#  array([ 6,  7,  8,  9, 10], dtype=int64),
#  array([11, 12, 13, 14, 15], dtype=int64)]

# 计算每个星期的最高价,最低价,星期一开盘价,星期五收盘价
def summarizer(a, o , h, l, c):
Monday_open = o[a[0]]
week_high = np.max(np.take(h, a))
week_low = np.min(np.take(l, a))
Friday_close = c[a[-1]]

return ("APPL", Monday_open, week_high, week_low, Friday_close)

weeksummary = np.apply_along_axis(summarizer, 1, week_indices, open, high, low, close)
weeksummary
# array([['APPL', '335.8', '346.7', '334.3', '346.5'],
#        ['APPL', '347.8', '360.0', '347.6', '356.8'],
#        ['APPL', '356.7', '364.9', '349.5', '350.5']],
#       dtype='<U5')

# 保存结果,第一个参数为文件名,第二个参数为保存的数组,第三个参数为分隔符,fmt为保存的数据格式
np.savetxt("weeksummary.csv", weeksummary, delimiter=",", fmt="%s")

10 真实波动幅度均值

# 读取最大值,最小值,收盘价
h, l, c = np.loadtxt('data.csv', delimiter=',', usecols=(4, 5, 6), unpack=True)

N = 20
h = h[-N:]
l = l[-N:]

len(h)
# 20

pre_close = c[-N -1: -1]  # 倒数第二十一的数值到倒数第一的数值
len(pre_close)
# 20
pre_close
# array([ 354.54,  356.85,  359.18,  359.9 ,  363.13,  358.3 ,  350.56,
#         338.61,  342.62,  342.88,  348.16,  353.21,  349.31,  352.12,
#         359.56,  360.  ,  355.36,  355.76,  352.47,  346.67])

# 获取当日最高价和最低价的差价,当日最高价和前一日收盘价差价,前一日收盘价和当日最低价的差价,求三者中的最大值
truerange = np.maximum(h - l, h - pre_close, pre_close - l)
truerange
# array([  4.26,   2.77,   2.42,   5.  ,   3.75,   9.98,   7.68,   6.03,
#          6.78,   5.55,   6.89,   8.04,   5.95,   7.67,   2.54,  10.36,
#          5.15,   4.16,   4.87,   7.32])

# 计算股票的ATR
atr = np.zeros(N)
atr[0] = np.mean(truerange)

for i in range(1, N):
atr[i] = (N-1) * atr[i - 1] + truerange[i]
atr[i] /= N

print("ATR", atr)
# ATR [ 5.8585      5.704075    5.53987125  5.51287769  5.4247338   5.65249711
#   5.75387226  5.76767864  5.81829471  5.80487998  5.85913598  5.96817918
#   5.96727022  6.05240671  5.87678637  6.10094705  6.0533997   5.95872972
#   5.90429323  5.97507857]

11 简单移动平均线

from matplotlib.pyplot import plot
from matplotlib.pyplot import show

N = 5
weights = np.ones(N) / N
weights
# array([ 0.2,  0.2,  0.2,  0.2,  0.2])

c = np.loadtxt('data.csv', delimiter=',', usecols=(6,), unpack=True)
sma = np.convolve(weights, c)[N-1: -N+1]  # 计算移动平均值, convolve(权重, 数组)
sma
# array([ 341.642,  343.722,  346.234,  348.268,  351.036,  353.256,
#         355.326,  356.786,  357.726,  358.72 ,  359.472,  358.214,
#         354.1  ,  350.644,  346.594,  344.566,  345.096,  347.236,
#         349.136,  352.472,  354.84 ,  355.27 ,  356.56 ,  356.63 ,
#         354.052,  352.45 ])

t = np.arange(N -1, len(c))
# array([ 4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
#        21, 22, 23, 24, 25, 26, 27, 28, 29])

plot(t, c[N-1:], lw=1.0)
plot(t, sma, lw=2.0)
show()


蓝色的线是原本的数值变化,橘色的线是简单移动平均线

12 指数移动平均线

x = np.arange(5)
np.exp(x)  # 计算指数
np.linspace(-1, 0, 5)
# array([-1.  , -0.75, -0.5 , -0.25,  0.  ])

N = 5
weights = np.exp(np.linspace(-1., 0., N))
weights /= weights.sum()  # 计算权重
weights
# array([ 0.11405072,  0.14644403,  0.18803785,  0.24144538,  0.31002201])

c = np.loadtxt('data.csv', delimiter=',', usecols=(6,), unpack=True)
ema = np.convolve(weights, c)[N-1:-N+1]
t = np.arange(N - 1, len(c))
plot(t, c[N-1:], lw=1.0)
plot(t, sma, lw=2.0)
plot(t, ema, lw=2.0)
show()


绿色的是指数加权平均线,比起橘色的简单移动平均线稍微往前靠

13 布林带

用于刻画价格的波动区域,分为中规(简单移动平均线),上轨(比中轨高出2倍标准差),下轨(比中轨低2倍标准差)

N = 5
weights = np.ones(N) / N
weights
# array([ 0.2,  0.2,  0.2,  0.2,  0.2])

c = np.loadtxt('data.csv', delimiter=',', usecols=(6,), unpack=True)
sma = np.convolve(weights, c)[N-1:-N+1]
deviation = []
C = len(c)

for i in range(N -1, C):
if i + N < C:
dev = c[i: i+N]
else:
dev = c[-N:]

averages = np.zeros(N)
averages.fill(sma[i - N -1])
dev = dev - averages
dev = dev ** 2
dev = np.sqrt(np.mean(dev))
deviation.append(dev)

deviation = 2 * np.array(deviation)
print(len(deviation), len(sma))
upperBB = sma + deviation
lowerBB = sma - deviation

c_slice = c[N-1:]
between_bands = np.where((c_slice < upperBB) & (c_slice > lowerBB))

print(lowerBB[between_bands])
print(c[between_bands])
print(upperBB[between_bands])
between_bands = len(np.ravel(between_bands))
print("Ratio between bands", float(between_bands)/len(c_slice))

t = np.arange(N - 1, C)
plot(t, c_slice, lw=1.0)
plot(t, sma, lw=2.0)
plot(t, upperBB, lw=3.0)
plot(t, lowerBB, lw=4.0)
show()

结果如下:

26 26
[ 329.23044409  335.70890572  318.53386282  321.90858271  327.74175968
331.5628136   337.94259734  343.84172744  339.99900409  336.58687297
333.15550418  328.64879207  323.61483771  327.25667796  334.30323599
335.79295948  326.55905786  324.27329493  325.47601386  332.85867025
341.63882551  348.75558399  348.48014357  348.01342992  343.56371701
341.85163786]
[ 336.1   339.32  345.03  344.32  343.44  346.5   351.88  355.2   358.16
354.54  356.85  359.18  359.9   363.13  358.3   350.56  338.61  342.62
342.88  348.16  353.21  349.31  352.12  359.56  360.    355.36]
[ 354.05355591  351.73509428  373.93413718  374.62741729  374.33024032
374.9491864   372.70940266  369.73027256  375.45299591  380.85312703
385.78849582  387.77920793  384.58516229  374.03132204  358.88476401
353.33904052  363.63294214  370.19870507  372.79598614  372.08532975
368.04117449  361.78441601  364.63985643  365.24657008  364.54028299
363.04836214]
Ratio between bands 1.0

阅读更多
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: