人工智能学习笔记——可视化库matplotlib
2018-03-26 23:09
686 查看
源数据(前12)
数据预处理(DATE格式转换):
import pandas as pd
unrate = pd.read_csv("UNRATE.csv")
unrate["DATE"] = pd.to_datetime(unrate["DATE"])
print(unrate.head(12))
DATE VALUE
0 1948-01-01 3.4
1 1948-02-01 3.8
2 1948-03-01 4.0
3 1948-04-01 3.9
4 1948-05-01 3.5
5 1948-06-01 3.6
6 1948-07-01 3.6
7 1948-08-01 3.9
8 1948-09-01 3.8
9 1948-10-01 3.7
10 1948-11-01 3.8
11 1948-12-01 4.0绘制折线图:import matplotlib.pyplot as plt
plt.plot()
plt.show()
firts_twelve = unrate = unrate[0:12]
plt.plot(firts_twelve["DATE"],firts_twelve["VALUE"])
plt.show()
x坐标标注旋转
plt.xticks(rotation = 45)
坐标轴与标题标注
plt.xlabel("Month")
plt.ylabel("Unemployment Rate")
plt.title("Monthly Unemployment Trend,1948")
子图操作:
fig = plt.figure()#新建绘图区域
ax1 = fig.add_subplot(4,3,1)
ax2 = fig.add_subplot(4,3,2)
ax3 = fig.add_subplot(4,3,6)
fig = plt.figure(figsize=(6,6))#指定画图区域大小
ax1 = fig.add_subplot(2,1,1)
ax2 = fig.add_subplot(2,1,2)
ax1.plot(np.arange(5),np.random.randint(1,5,5))
ax2.plot(np.arange(10),np.arange(10)*3)
plt.show()
同一坐标系下绘制多条线:
fig = plt.figure(figsize=(6,3))
plt.plot(unrate[0:12]['MONTH'], unrate[0:12]['VALUE'], c='red')
plt.plot(unrate[12:24]['MONTH'], unrate[12:24]['VALUE'], c='blue')
plt.show()
fig = plt.figure(figsize=(10,6))
colors = ['red', 'blue', 'green', 'orange', 'black']
for i in range(5):
start_index = i*12
end_index = (i+1)*12
subset = unrate[start_index:end_index]
plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i])
plt.show()
曲线标签:
fig = plt.figure(figsize=(5,3))
colors = ['red', 'blue', 'green', 'orange', 'black']
for i in range(5):
start_index = i*12
end_index = (i+1)*12
subset = unrate[start_index:end_index]
label = str(1948 + i)
plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i], label=label)
plt.legend(loc='best')#标签定位
#print (help(plt.legend))
plt.show()
loc= best
upper right
upper left
lower left
lower right
right
center left
center right
lower center
upper center
center
完整折线图:
fig = plt.figure(figsize=(5,3))
colors = ['red', 'blue', 'green', 'orange', 'black']
for i in range(5):
start_index = i*12
end_index = (i+1)*12
subset = unrate[start_index:end_index]
label = str(1948 + i)
plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i], label=label)
plt.legend(loc='upper left')
plt.xlabel('Month, Integer')
plt.ylabel('Unemployment Rate, Percent')
plt.title('Monthly Unemployment Trends, 1948-1952')
plt.show()
绘制条形图:
import pandas as pd
reviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]
print(norm_reviews[:1])
from numpy import arange
#The Axes.bar() method has 2 required parameters, left and height.
#We use the left parameter to specify the x coordinates of the left sides of the bar.
#We use the height parameter to specify the height of each bar
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
bar_heights = norm_reviews.ix[0, num_cols].values
print (bar_heights)
bar_positions = arange(5) + 0.75
print (bar_positions)
fig, ax = plt.subplots()
ax.bar(bar_positions, bar_heights, 0.5)
plt.show()
[4.3 3.55 3.9 4.5 5.0]
[0.75 1.75 2.75 3.75 4.75]
横向条形图:
import matplotlib.pyplot as plt
from numpy import arange
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
bar_widths = norm_reviews.ix[0, num_cols].values
bar_positions = arange(5) + 0.75
tick_positions = range(1,6)
fig, ax = plt.subplots()
ax.barh(bar_positions, bar_widths, 0.5)
ax.set_yticks(tick_positions)
ax.set_yticklabels(num_cols)
ax.set_ylabel('Rating Source')
ax.set_xlabel('Average Rating')
ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')
plt.show()
散点图:
fig, ax = plt.subplots()
ax.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm'])
ax.set_xlabel('Fandango')
ax.set_ylabel('Rotten Tomatoes')
plt.show()
柱形图:
import pandas as pd
import matplotlib.pyplot as plt
reviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']
norm_reviews = reviews[cols]
print(norm_reviews[:5])
FILM RT_user_norm Metacritic_user_nom \
0 Avengers: Age of Ultron (2015) 4.3 3.55
1 Cinderella (2015) 4.0 3.75
2 Ant-Man (2015) 4.5 4.05
3 Do You Believe? (
b301
2015) 4.2 2.35
4 Hot Tub Time Machine 2 (2015) 1.4 1.70
IMDB_norm Fandango_Ratingvalue
0 3.90 4.5
1 3.55 4.5
2 3.90 4.5
3 2.70 4.5
4 2.55 3.0 fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()
fandango_distribution = fandango_distribution.sort_index()
imdb_distribution = norm_reviews['IMDB_norm'].value_counts()
imdb_distribution = imdb_distribution.sort_index()
print(fandango_distribution)
print(imdb_distribution)
#ax.hist(norm_reviews['Fandango_Ratingvalue'])#绘制柱形图
#ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20)#规定20条
ax.hist(norm_reviews['Fandango_Ratingvalue'], range=(4, 5),bins=20)#4到5范围内20条
plt.show()
fig = plt.figure(figsize=(5,20))
ax1 = fig.add_subplot(4,1,1)
ax2 = fig.add_subplot(4,1,2)
ax3 = fig.add_subplot(4,1,3)
ax4 = fig.add_subplot(4,1,4)
ax1.hist(norm_reviews['Fandango_Ratingvalue'], bins=20, range=(0, 5))
ax1.set_title('Distribution of Fandango Ratings')
ax1.set_ylim(0, 50)#y轴范围
plt.show()
箱型图:
fig, ax = plt.subplots()
ax.boxplot(norm_reviews['RT_user_norm'].values)
ax.set_xticklabels(['Rotten Tomatoes'])
ax.set_ylim(0, 5)
plt.show()
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']
fig, ax = plt.subplots()
ax.boxplot(norm_reviews[num_cols].values)
ax.set_xticklabels(num_cols, rotation=90)
ax.set_ylim(0,5)
plt.show()
去坐标锯齿:
fig, ax = plt.subplots()
# Add your code here.
fig, ax = plt.subplots()
ax.tick_params(bottom="off", top="off", left="off", right="off")
plt.show()
去边框:
fig, ax = plt.subplots()
# Add your code here.
fig, ax = plt.subplots()
for key,spine in ax.spines.items():
spine.set_visible(False)
plt.show()
RGB颜色通道:
cb_dark_blue = (0/255, 107/255, 164/255)
线宽:
ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women', linewidth=10)
曲线标注:
ax.text(2005, 87, 'Men')
ax.text(2002, 8, 'Women')
数据预处理(DATE格式转换):
import pandas as pd
unrate = pd.read_csv("UNRATE.csv")
unrate["DATE"] = pd.to_datetime(unrate["DATE"])
print(unrate.head(12))
DATE VALUE
0 1948-01-01 3.4
1 1948-02-01 3.8
2 1948-03-01 4.0
3 1948-04-01 3.9
4 1948-05-01 3.5
5 1948-06-01 3.6
6 1948-07-01 3.6
7 1948-08-01 3.9
8 1948-09-01 3.8
9 1948-10-01 3.7
10 1948-11-01 3.8
11 1948-12-01 4.0绘制折线图:import matplotlib.pyplot as plt
plt.plot()
plt.show()
firts_twelve = unrate = unrate[0:12]
plt.plot(firts_twelve["DATE"],firts_twelve["VALUE"])
plt.show()
x坐标标注旋转
plt.xticks(rotation = 45)
坐标轴与标题标注
plt.xlabel("Month")
plt.ylabel("Unemployment Rate")
plt.title("Monthly Unemployment Trend,1948")
子图操作:
fig = plt.figure()#新建绘图区域
ax1 = fig.add_subplot(4,3,1)
ax2 = fig.add_subplot(4,3,2)
ax3 = fig.add_subplot(4,3,6)
fig = plt.figure(figsize=(6,6))#指定画图区域大小
ax1 = fig.add_subplot(2,1,1)
ax2 = fig.add_subplot(2,1,2)
ax1.plot(np.arange(5),np.random.randint(1,5,5))
ax2.plot(np.arange(10),np.arange(10)*3)
plt.show()
同一坐标系下绘制多条线:
fig = plt.figure(figsize=(6,3))
plt.plot(unrate[0:12]['MONTH'], unrate[0:12]['VALUE'], c='red')
plt.plot(unrate[12:24]['MONTH'], unrate[12:24]['VALUE'], c='blue')
plt.show()
fig = plt.figure(figsize=(10,6))
colors = ['red', 'blue', 'green', 'orange', 'black']
for i in range(5):
start_index = i*12
end_index = (i+1)*12
subset = unrate[start_index:end_index]
plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i])
plt.show()
曲线标签:
fig = plt.figure(figsize=(5,3))
colors = ['red', 'blue', 'green', 'orange', 'black']
for i in range(5):
start_index = i*12
end_index = (i+1)*12
subset = unrate[start_index:end_index]
label = str(1948 + i)
plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i], label=label)
plt.legend(loc='best')#标签定位
#print (help(plt.legend))
plt.show()
loc= best
upper right
upper left
lower left
lower right
right
center left
center right
lower center
upper center
center
完整折线图:
fig = plt.figure(figsize=(5,3))
colors = ['red', 'blue', 'green', 'orange', 'black']
for i in range(5):
start_index = i*12
end_index = (i+1)*12
subset = unrate[start_index:end_index]
label = str(1948 + i)
plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i], label=label)
plt.legend(loc='upper left')
plt.xlabel('Month, Integer')
plt.ylabel('Unemployment Rate, Percent')
plt.title('Monthly Unemployment Trends, 1948-1952')
plt.show()
绘制条形图:
import pandas as pd
reviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]
print(norm_reviews[:1])
FILM RT_user_norm Metacritic_user_nom \ 0 Avengers: Age of Ultron (2015) 4.3 3.55 IMDB_norm Fandango_Ratingvalue Fandango_Stars 0 3.9 4.5 5.0import matplotlib.pyplot as plt
from numpy import arange
#The Axes.bar() method has 2 required parameters, left and height.
#We use the left parameter to specify the x coordinates of the left sides of the bar.
#We use the height parameter to specify the height of each bar
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
bar_heights = norm_reviews.ix[0, num_cols].values
print (bar_heights)
bar_positions = arange(5) + 0.75
print (bar_positions)
fig, ax = plt.subplots()
ax.bar(bar_positions, bar_heights, 0.5)
plt.show()
[4.3 3.55 3.9 4.5 5.0]
[0.75 1.75 2.75 3.75 4.75]
横向条形图:
import matplotlib.pyplot as plt
from numpy import arange
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
bar_widths = norm_reviews.ix[0, num_cols].values
bar_positions = arange(5) + 0.75
tick_positions = range(1,6)
fig, ax = plt.subplots()
ax.barh(bar_positions, bar_widths, 0.5)
ax.set_yticks(tick_positions)
ax.set_yticklabels(num_cols)
ax.set_ylabel('Rating Source')
ax.set_xlabel('Average Rating')
ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')
plt.show()
散点图:
fig, ax = plt.subplots()
ax.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm'])
ax.set_xlabel('Fandango')
ax.set_ylabel('Rotten Tomatoes')
plt.show()
柱形图:
import pandas as pd
import matplotlib.pyplot as plt
reviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']
norm_reviews = reviews[cols]
print(norm_reviews[:5])
FILM RT_user_norm Metacritic_user_nom \
0 Avengers: Age of Ultron (2015) 4.3 3.55
1 Cinderella (2015) 4.0 3.75
2 Ant-Man (2015) 4.5 4.05
3 Do You Believe? (
b301
2015) 4.2 2.35
4 Hot Tub Time Machine 2 (2015) 1.4 1.70
IMDB_norm Fandango_Ratingvalue
0 3.90 4.5
1 3.55 4.5
2 3.90 4.5
3 2.70 4.5
4 2.55 3.0 fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()
fandango_distribution = fandango_distribution.sort_index()
imdb_distribution = norm_reviews['IMDB_norm'].value_counts()
imdb_distribution = imdb_distribution.sort_index()
print(fandango_distribution)
print(imdb_distribution)
2.7 2 2.8 2 2.9 5 3.0 4 3.1 3 3.2 5 3.3 4 3.4 9 3.5 9 3.6 8 3.7 9 3.8 5 3.9 12 4.0 7 4.1 16 4.2 12 4.3 11 4.4 7 4.5 9 4.6 4 4.8 3 Name: Fandango_Ratingvalue, dtype: int64 2.00 1 2.10 1 2.15 1 2.20 1 2.30 2 2.45 2 2.50 1 2.55 1 2.60 2 2.70 4 2.75 5 2.80 2 2.85 1 2.90 1 2.95 3 3.00 2 3.05 4 3.10 1 3.15 9 3.20 6 3.25 4 3.30 9 3.35 7 3.40 1 3.45 7 3.50 4 3.55 7 3.60 10 3.65 5 3.70 8 3.75 6 3.80 3 3.85 4 3.90 9 3.95 2 4.00 1 4.05 1 4.10 4 4.15 1 4.20 2 4.30 1 Name: IMDB_norm, dtype: int64fig, ax = plt.subplots()
#ax.hist(norm_reviews['Fandango_Ratingvalue'])#绘制柱形图
#ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20)#规定20条
ax.hist(norm_reviews['Fandango_Ratingvalue'], range=(4, 5),bins=20)#4到5范围内20条
plt.show()
fig = plt.figure(figsize=(5,20))
ax1 = fig.add_subplot(4,1,1)
ax2 = fig.add_subplot(4,1,2)
ax3 = fig.add_subplot(4,1,3)
ax4 = fig.add_subplot(4,1,4)
ax1.hist(norm_reviews['Fandango_Ratingvalue'], bins=20, range=(0, 5))
ax1.set_title('Distribution of Fandango Ratings')
ax1.set_ylim(0, 50)#y轴范围
plt.show()
箱型图:
fig, ax = plt.subplots()
ax.boxplot(norm_reviews['RT_user_norm'].values)
ax.set_xticklabels(['Rotten Tomatoes'])
ax.set_ylim(0, 5)
plt.show()
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']
fig, ax = plt.subplots()
ax.boxplot(norm_reviews[num_cols].values)
ax.set_xticklabels(num_cols, rotation=90)
ax.set_ylim(0,5)
plt.show()
去坐标锯齿:
fig, ax = plt.subplots()
# Add your code here.
fig, ax = plt.subplots()
ax.tick_params(bottom="off", top="off", left="off", right="off")
plt.show()
去边框:
fig, ax = plt.subplots()
# Add your code here.
fig, ax = plt.subplots()
for key,spine in ax.spines.items():
spine.set_visible(False)
plt.show()
RGB颜色通道:
cb_dark_blue = (0/255, 107/255, 164/255)
线宽:
ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women', linewidth=10)
曲线标注:
ax.text(2005, 87, 'Men')
ax.text(2002, 8, 'Women')
相关文章推荐
- 【Python学习笔记 】11.可视化库Matplotlib(上)
- 【Python学习笔记 】12.可视化库Matplotlib(下)
- Matplotlib学习笔记之数据可视化
- python可视化-matplotlib学习2
- matplotlib学习笔记--Legend
- python Matplotlib 学习笔记(2)
- python学习笔记(1)之如何在python3.x下安装PIL、numpy以及matplotlib
- 阿齐兹的PyCV学习笔记——PIL、Matplotlib
- 深度学习之Python的可视化包 – Matplotlib
- matplotlib figure函数学习笔记
- matplotlib Axes.scatter 函数学习笔记
- 基于matplotlib的数据可视化 - 笔记
- TF之NN:matplotlib动态演示深度学习之tensorflow将神经网络系统自动学习并优化修正并且将输出结果可视化—Jason niu
- Matplotlib 学习笔记
- Python学习笔记(八)-(2)matplotlib作图之legend
- Python数据可视化-matplotlib学习1
- Python可视化学习(2):Matplotlib快速绘图基础
- 雾山的Python学习笔记---Matplotlib的安装
- matplotlib 学习笔记(一)
- python 数据可视化 matplotlib学习三:生成随机漫步数据并使用matplotlib呈现