您的位置:首页 > 编程语言 > Python开发

Python数据分析与机器学习-Matplotlib

2017-12-27 21:57 471 查看
源码下载位置:
http://download.csdn.net/download/adam_zs/10176798
import pandas as pd
import matplotlib.pyplot as plt

women_degrees = pd.read_csv('percent-bachelors-degrees-women-usa.csv')

major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics']
cb_dark_blue = (0 / 255, 107 / 255, 164 / 255)
cb_orange = (255 / 255, 128 / 255, 14 / 255)

fig = plt.figure(figsize=(12, 12))

for sp in range(0, 4):
ax = fig.add_subplot(2, 2, sp + 1)
# The color for each line is assigned here.
ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women')
ax.plot(women_degrees['Year'], 100 - women_degrees[major_cats[sp]], c=cb_orange, label='Men')
for key, spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0, 100)
ax.set_title(major_cats[sp])
ax.tick_params(bottom="off", top="off", left="off", right="off")

plt.legend(loc='upper right')
plt.show()

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# unrate.csv 每年失业率
unrate = pd.read_csv('unrate.csv')
unrate['DATE'] = pd.to_datetime(unrate['DATE'])

# 折线图plot
# first_twelve = unrate[0:12]
# plt.plot(first_twelve['DATE'], first_twelve['VALUE'])
# plt.xticks(rotation=45) #xticks指定角度
# plt.xlabel('Month')
# plt.ylabel('Unemployment Rate')
# plt.title('Monthly Unemployment Trends, 1948')
# plt.show()

# 一个区域添加子图
# add_subplot(first,second,index) first means number of Row,second means number of Column.
# #figure得到画图区间
# fig = plt.figure()  # 添加子图,first:行数, second:列数, index:该图的位置索引
# ax1 = fig.add_subplot(3, 2, 1) #add_subplot添加子图
# ax2 = fig.add_subplot(3, 2, 2)
# ax6 = fig.add_subplot(3, 2, 6)
# plt.show()

# fig = plt.figure()
# ax1 = fig.add_subplot(2, 1, 1)
# ax2 = fig.add_subplot(2, 1, 2)
# # np.random.randint(low,high,size) 结果:[low,high) size=5 reust:[2 2 1 2 1],size=(3,2)reust:[[1 4][1 4][3 3]]
# ax1.plot(np.random.randint(1, 5, size=5), np.arange(5))
# ax2.plot(np.arange(10) * 3, np.arange(10))
# plt.show()

unrate['MONTH'] = unrate['DATE'].dt.month

# 在同一个图画多条线
# fig = plt.figure(figsize=(6, 3))  # 画图区域大小(宽、长),单位英寸
# plt.plot(unrate['MONTH'][:12], unrate['VALUE'][:12], c="red") #c:颜色
# plt.plot(unrate['MONTH'][12:24], unrate['VALUE'][12:24], c="blue")
# plt.show()

# fig = plt.figure(figsize=(10, 6))
# colors = ['red', 'blue', 'green', 'orange', 'black']
# for i in range(5):
#     start_index = i * 12
#     end_index = (i + 1) * 12
#     subset = unrate[start_index:end_index]
#     plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i])
# plt.show()

# 给每条线添加注释
# fig = plt.figure(figsize=(10, 6))
# colors = ['red', 'blue', 'green', 'orange', 'black']
# for i in range(5):
#     start_index = i * 12
#     end_index = (i + 1) * 12
#     subset = unrate[start_index:end_index]
#     label = str(1948 + i)
#     plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i], label=label)#label:设置标签
# plt.legend()  # 设置显示图例
# plt.show()

fig = plt.figure(figsize=(10, 6))
colors = ['red', 'blue', 'green', 'orange', 'black']
for i in range(5):
start_index = i * 12
end_index = (i + 1) * 12
subset = unrate[start_index:end_index]
label = str(1948 + i)
plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i], label=label)
print(help(plt.legend))
plt.legend(loc='upper left') #loc=best 自动放在合适的地方
plt.xlabel('Month, Integer')
plt.ylabel('Unemployment Rate, Percent')
plt.title('Monthly Unemployment Trends, 1948-1952')
plt.show()

import pandas as pd

pd.set_option('display.height', 9999)
pd.set_option('display.max_rows', 9999)
pd.set_option('display.max_columns', 9999)
pd.set_option('display.width', 9999)

reviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]
# print(norm_reviews[:1])

import matplotlib.pyplot as plt
from numpy import arange

# The Axes.bar() method has 2 required parameters, left and height.
# We use the left parameter to specify the x coordinates of the left sides of the bar.
# We use the height parameter to specify the height of each bar

'''
plt.subplots()是返回包含图形和轴对象的元组的函数。
因此,当使用fig, ax = plt.subplots()你解压缩这个元组到变量fig和ax。
有fig,如果你想改变人物级别的属性或保存数字作为以后的图像文件是非常有用的(例如用fig.savefig('yourfilename.png')。
你当然不必使用返回的数字对象,但因此它经常可以看到很多人都用到它。
而且,所有轴对象(具有绘图方法的对象)都有一个父图形对象
'''

# 柱形图 bar
# #num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
# # print(norm_reviews.head())
# # print(norm_reviews.ix[4, num_cols])  # 显示索引4的num_cols列结果
# bar_heights = norm_reviews.ix[2, num_cols].values #柱得到高度
# bar_positions = arange(5) + 0.75  # 柱离原点的位置
# fig, ax = plt.subplots() #ax对图进行操作
# ax.bar(bar_positions, bar_heights, 0.5) #.bar柱状图 0.5柱的宽度
# plt.show()

# num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
# bar_heights = norm_reviews.ix[0, num_cols].values
# bar_positions = arange(5) + 0.75
# tick_positions = range(1, 6)
# fig, ax = plt.subplots()
# ax.bar(bar_positions, bar_heights, 0.5)
# ax.set_xticks(tick_positions)  # x轴标签的位置
# ax.set_xticklabels(num_cols, rotation=45)
# ax.set_xlabel('Rating Source')
# ax.set_ylabel('Average Rating')
# ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')
# plt.show()

# num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
# bar_widths = norm_reviews.ix[0, num_cols].values
# bar_positions = arange(5) + 0.75
# tick_positions = range(1, 6)
# fig, ax = plt.subplots()
# ax.barh(bar_positions, bar_widths, 0.5) #.barh得到横着的图
# ax.set_yticks(tick_positions)
# ax.set_yticklabels(num_cols)
# ax.set_ylabel('Rating Source')
# ax.set_xlabel('Average Rating')
# ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')
# plt.show()

# 散点图scatter
# fig, ax = plt.subplots()
# ax.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm'])
# ax.set_xlabel('Fandango')
# ax.set_ylabel('Rotten Tomatoes')
# plt.show()

fig = plt.figure(figsize=(5, 10))
ax1 = fig.add_subplot(2, 1, 1)
ax2 = fig.add_subplot(2, 1, 2)
ax1.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm'])
ax1.set_xlabel('Fandango')
ax1.set_ylabel('Rotten Tomatoes')
ax2.scatter(norm_reviews['RT_user_norm'], norm_reviews['Fandango_Ratingvalue'])
ax2.set_xlabel('Rotten Tomatoes')
ax2.set_ylabel('Fandango')
plt.show()

import pandas as pd
import matplotlib.pyplot as plt

pd.set_option('display.height', 9999)
pd.set_option('display.max_rows', 9999)
pd.set_option('display.max_columns', 9999)
pd.set_option('display.width', 9999)

reviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']
norm_reviews = reviews[cols]

# value_counts 针对每个值统计数量
# fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()
# fandango_distribution = fandango_distribution.sort_index()
#
# imdb_distribution = norm_reviews['IMDB_norm'].value_counts()
# imdb_distribution = imdb_distribution.sort_index()
#
# print(fandango_distribution)
# print(imdb_distribution)

# hist函数,给定一堆数据,统计数据在某一值的个数。
# range 设置显示的范围,范围之外的将被舍弃(指定横轴)
# bins : 显示柱状图的个数
# fig, ax = plt.subplots()
# print(norm_reviews['Fandango_Ratingvalue'])
# # ax.hist(norm_reviews['Fandango_Ratingvalue']) #默认bins=10
# # ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20)
# ax.hist(norm_reviews['Fandango_Ratingvalue'], range=(4, 5), bins=20)
# plt.show()

# fig = plt.figure(figsize=(5, 20))
# ax1 = fig.add_subplot(4, 1, 1)
# ax2 = fig.add_subplot(4, 1, 2)
# ax3 = fig.add_subplot(4, 1, 3)
# ax4 = fig.add_subplot(4, 1, 4)
# ax1.hist(norm_reviews['Fandango_Ratingvalue'], bins=20, range=(0, 5))
# ax1.set_title('Distribution of Fandango Ratings')
# ax1.set_ylim(0, 50) #set_ylim设置y轴的区间
#
# ax2.hist(norm_reviews['RT_user_norm'], 20, range=(0, 5))
# ax2.set_title('Distribution of Rotten Tomatoes Ratings')
# ax2.set_ylim(0, 50)
#
# ax3.hist(norm_reviews['Metacritic_user_nom'], 20, range=(0, 5))
# ax3.set_title('Distribution of Metacritic Ratings')
# ax3.set_ylim(0, 50)
#
# ax4.hist(norm_reviews['IMDB_norm'], 20, range=(0, 5))
# ax4.set_title('Distribution of IMDB Ratings')
# ax4.set_ylim(0, 50)
#
# plt.show()

# 盒图(4分图)
# fig, ax = plt.subplots()
# print(norm_reviews['RT_user_norm'])
# ax.boxplot(norm_reviews['RT_user_norm'])
# ax.set_xticklabels(['Rotten Tomatoes'])
# ax.set_ylim(0, 5)
# plt.show()

num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']
fig, ax = plt.subplots()
ax.boxplot(norm_reviews[num_cols].values)
ax.set_xticklabels(num_cols, rotation=45)
ax.set_ylim(0, 5)
plt.show()

import pandas as pd
import matplotlib.pyplot as plt

# 不同学科男女的比例
women_degrees = pd.read_csv('percent-bachelors-degrees-women-usa.csv')

# plt.plot(women_degrees['Year'], women_degrees['Biology'])
# plt.show()

# plt.plot(women_degrees['Year'], women_degrees['Biology'], c='blue', label='Women')
# plt.plot(women_degrees['Year'], 100 - women_degrees['Biology'], c='green', label='Men')
# plt.legend(loc='upper right')
# plt.title('Percentage of Biology Degrees Awarded By Gender')
# plt.show()

# fig, ax = plt.subplots()
# ax.plot(women_degrees['Year'], women_degrees['Biology'], label='Women')
# ax.plot(women_degrees['Year'], 100 - women_degrees['Biology'], label='Men')
# #图像的刻度、标注等部分作不显示设置
# ax.tick_params(bottom="off", top="off", left="off", right="off")
# ax.set_title('Percentage of Biology Degrees Awarded By Gender')
# ax.legend(loc="upper right")
#
# plt.show()

# fig, ax = plt.subplots()
# ax.plot(women_degrees['Year'], women_degrees['Biology'], c='blue', label='Women')
# ax.plot(women_degrees['Year'], 100 - women_degrees['Biology'], c='green', label='Men')
# ax.tick_params(bottom="off", top="off", left="off", right="off")
#
# for key, spine in ax.spines.items():
#     spine.set_visible(False)
# ax.legend(loc='upper right')
# plt.show()

major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics']
fig = plt.figure(figsize=(12, 12))

for sp in range(0, 4):
ax = fig.add_subplot(2, 2, sp + 1)
ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c='blue', label='Women')
ax.plot(women_degrees['Year'], 100 - women_degrees[major_cats[sp]], c='green', label='Men')
# Add your code here.

# Calling pyplot.legend() here will add the legend to the last subplot that was created.
plt.legend(loc='upper right')
plt.show()

major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics']
fig = plt.figure(figsize=(12, 12))

for sp in range(0, 4):
ax = fig.add_subplot(2, 2, sp + 1)
ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c='blue', label='Women')
ax.plot(women_degrees['Year'], 100 - women_degrees[major_cats[sp]], c='green', label='Men')
for key, spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0, 100)
ax.set_title(major_cats[sp])
ax.tick_params(bottom="off", top="off", left="off", right="off")

# Calling pyplot.legend() here will add the legend to the last subplot that was created.
plt.legend(loc='upper right')
plt.show()

import pandas as pd
import matplotlib.pyplot as plt

women_degrees = pd.read_csv('percent-bachelors-degrees-women-usa.csv')
major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics']

# plt.plot(women_degrees['Year'], women_degrees['Biology'])
# plt.show()

# plt.plot(women_degrees['Year'], women_degrees['Biology'], c='blue', label='Women')
# plt.plot(women_degrees['Year'], 100 - women_degrees['Biology'], c='green', label='Men')
# plt.legend(loc='upper right')
# plt.title('Percentage of Biology Degrees Awarded By Gender')
# plt.show()

# fig, ax = plt.subplots()
# ax.plot(women_degrees['Year'], women_degrees['Biology'], label='Women')
# ax.plot(women_degrees['Year'], 100 - women_degrees['Biology'], label='Men')
# #图像的刻度、标注等部分作不显示设置
# ax.tick_params(bottom="off", top="off", left="off", right="off")
# ax.set_title('Percentage of Biology Degrees Awarded By Gender')
# ax.legend(loc="upper right")
#
# plt.show()

# fig, ax = plt.subplots()
# ax.plot(women_degrees['Year'], women_degrees['Biology'], c='blue', label='Women')
# ax.plot(women_degrees['Year'], 100 - women_degrees['Biology'], c='green', label='Men')
# ax.tick_params(bottom="off", top="off", left="off", right="off")
#
# for key, spine in ax.spines.items():
#     spine.set_visible(False)
# ax.legend(loc='upper right')
# plt.show()

# fig = plt.figure(figsize=(12, 12))
#
# for sp in range(0, 4):
#     ax = fig.add_subplot(2, 2, sp + 1)
#     ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c='blue', label='Women')
#     ax.plot(women_degrees['Year'], 100 - women_degrees[major_cats[sp]], c='green', label='Men')
#
# # Calling pyplot.legend() here will add the legend to the last subplot that was created.
# plt.legend(loc='upper right')
# plt.show()

# major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics']
# fig = plt.figure(figsize=(12, 12))
#
# for sp in range(0, 4):
#     ax = fig.add_subplot(2, 2, sp + 1)
#     ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c='blue', label='Women')
#     ax.plot(women_degrees['Year'], 100 - women_degrees[major_cats[sp]], c='green', label='Men')
#     for key, spine in ax.spines.items():
#         spine.set_visible(False)
#     ax.set_xlim(1968, 2011)
#     ax.set_ylim(0, 100)
#     ax.set_title(major_cats[sp])
#     ax.tick_params(bottom="off", top="off", left="off", right="off")
#
# # Calling pyplot.legend() here will add the legend to the last subplot that was created.
# plt.legend(loc='upper right')
# plt.show()

# Setting Line Width
cb_dark_blue = (0 / 255, 107 / 255, 164 / 255)
cb_orange = (255 / 255, 128 / 255, 14 / 255)

#
# fig = plt.figure(figsize=(12, 12))
#
# for sp in range(0, 4):
#     ax = fig.add_subplot(2, 2, sp + 1)
#     # Set the line width when specifying how each line should look.
#     ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women', linewidth=10)
#     ax.plot(women_degrees['Year'], 100 - women_degrees[major_cats[sp]], c=cb_orange, label='Men', linewidth=10)
#     for key, spine in ax.spines.items():
#         spine.set_visible(False)
#     ax.set_xlim(1968, 2011)
#     ax.set_ylim(0, 100)
#     ax.set_title(major_cats[sp])
#     ax.tick_params(bottom="off", top="off", left="off", right="off")
#
# plt.legend(loc='upper right')
# plt.show()

stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics']
# fig = plt.figure(figsize=(18, 3))
#
# for sp in range(0, 6):
#     ax = fig.add_subplot(1, 6, sp + 1)
#     ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3)
#     ax.plot(women_degrees['Year'], 100 - women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=3)
#     for key, spine in ax.spines.items():
#         spine.set_visible(False)
#     ax.set_xlim(1968, 2011)
#     ax.set_ylim(0, 100)
#     ax.set_title(stem_cats[sp])
#     ax.tick_params(bottom="off", top="off", left="off", right="off")
#
# plt.legend(loc='upper right')
# plt.show()

# fig = plt.figure(figsize=(18, 3))
#
# for sp in range(0, 6):
#     ax = fig.add_subplot(1, 6, sp + 1)
#     ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3)
#     ax.plot(women_degrees['Year'], 100 - women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=3)
#     for key, spine in ax.spines.items():
#         spine.set_visible(False)
#     ax.set_xlim(1968, 2011)
#     ax.set_ylim(0, 100)
#     ax.set_title(stem_cats[sp])
#     ax.tick_params(bottom="off", top="off", left="off", right="off")
# plt.legend(loc='upper right')
# plt.show()
# fig = plt.figure(figsize=(18, 3))
#
# for sp in range(0, 6):
#     ax = fig.add_subplot(1, 6, sp + 1)
#     ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3)
#     ax.plot(women_degrees['Year'], 100 - women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=3)
#     for key, spine in ax.spines.items():
#         spine.set_visible(False)
#     ax.set_xlim(1968, 2011)
#     ax.set_ylim(0, 100)
#     ax.set_title(stem_cats[sp])
#     ax.tick_params(bottom="off", top="off", left="off", right="off")
#
#     if sp == 0:
#         ax.text(2005, 87, 'Men') #在指定位置添加文字
#         ax.text(2002, 8, 'Women')
#     elif sp == 5:
#         ax.text(2005, 62, 'Men')
#         ax.text(2001, 35, 'Women')
# plt.show()
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息