Python数据分析与机器学习-Matplotlib
2017-12-27 21:57
471 查看
源码下载位置:
http://download.csdn.net/download/adam_zs/10176798
import pandas as pd
import matplotlib.pyplot as plt
women_degrees = pd.read_csv('percent-bachelors-degrees-women-usa.csv')
major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics']
cb_dark_blue = (0 / 255, 107 / 255, 164 / 255)
cb_orange = (255 / 255, 128 / 255, 14 / 255)
fig = plt.figure(figsize=(12, 12))
for sp in range(0, 4):
ax = fig.add_subplot(2, 2, sp + 1)
# The color for each line is assigned here.
ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women')
ax.plot(women_degrees['Year'], 100 - women_degrees[major_cats[sp]], c=cb_orange, label='Men')
for key, spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0, 100)
ax.set_title(major_cats[sp])
ax.tick_params(bottom="off", top="off", left="off", right="off")
plt.legend(loc='upper right')
plt.show()
http://download.csdn.net/download/adam_zs/10176798
import pandas as pd
import matplotlib.pyplot as plt
women_degrees = pd.read_csv('percent-bachelors-degrees-women-usa.csv')
major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics']
cb_dark_blue = (0 / 255, 107 / 255, 164 / 255)
cb_orange = (255 / 255, 128 / 255, 14 / 255)
fig = plt.figure(figsize=(12, 12))
for sp in range(0, 4):
ax = fig.add_subplot(2, 2, sp + 1)
# The color for each line is assigned here.
ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women')
ax.plot(women_degrees['Year'], 100 - women_degrees[major_cats[sp]], c=cb_orange, label='Men')
for key, spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0, 100)
ax.set_title(major_cats[sp])
ax.tick_params(bottom="off", top="off", left="off", right="off")
plt.legend(loc='upper right')
plt.show()
import pandas as pd import matplotlib.pyplot as plt import numpy as np # unrate.csv 每年失业率 unrate = pd.read_csv('unrate.csv') unrate['DATE'] = pd.to_datetime(unrate['DATE']) # 折线图plot # first_twelve = unrate[0:12] # plt.plot(first_twelve['DATE'], first_twelve['VALUE']) # plt.xticks(rotation=45) #xticks指定角度 # plt.xlabel('Month') # plt.ylabel('Unemployment Rate') # plt.title('Monthly Unemployment Trends, 1948') # plt.show() # 一个区域添加子图 # add_subplot(first,second,index) first means number of Row,second means number of Column. # #figure得到画图区间 # fig = plt.figure() # 添加子图,first:行数, second:列数, index:该图的位置索引 # ax1 = fig.add_subplot(3, 2, 1) #add_subplot添加子图 # ax2 = fig.add_subplot(3, 2, 2) # ax6 = fig.add_subplot(3, 2, 6) # plt.show() # fig = plt.figure() # ax1 = fig.add_subplot(2, 1, 1) # ax2 = fig.add_subplot(2, 1, 2) # # np.random.randint(low,high,size) 结果:[low,high) size=5 reust:[2 2 1 2 1],size=(3,2)reust:[[1 4][1 4][3 3]] # ax1.plot(np.random.randint(1, 5, size=5), np.arange(5)) # ax2.plot(np.arange(10) * 3, np.arange(10)) # plt.show() unrate['MONTH'] = unrate['DATE'].dt.month # 在同一个图画多条线 # fig = plt.figure(figsize=(6, 3)) # 画图区域大小(宽、长),单位英寸 # plt.plot(unrate['MONTH'][:12], unrate['VALUE'][:12], c="red") #c:颜色 # plt.plot(unrate['MONTH'][12:24], unrate['VALUE'][12:24], c="blue") # plt.show() # fig = plt.figure(figsize=(10, 6)) # colors = ['red', 'blue', 'green', 'orange', 'black'] # for i in range(5): # start_index = i * 12 # end_index = (i + 1) * 12 # subset = unrate[start_index:end_index] # plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i]) # plt.show() # 给每条线添加注释 # fig = plt.figure(figsize=(10, 6)) # colors = ['red', 'blue', 'green', 'orange', 'black'] # for i in range(5): # start_index = i * 12 # end_index = (i + 1) * 12 # subset = unrate[start_index:end_index] # label = str(1948 + i) # plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i], label=label)#label:设置标签 # plt.legend() # 设置显示图例 # plt.show() fig = plt.figure(figsize=(10, 6)) colors = ['red', 'blue', 'green', 'orange', 'black'] for i in range(5): start_index = i * 12 end_index = (i + 1) * 12 subset = unrate[start_index:end_index] label = str(1948 + i) plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i], label=label) print(help(plt.legend)) plt.legend(loc='upper left') #loc=best 自动放在合适的地方 plt.xlabel('Month, Integer') plt.ylabel('Unemployment Rate, Percent') plt.title('Monthly Unemployment Trends, 1948-1952') plt.show()
import pandas as pd pd.set_option('display.height', 9999) pd.set_option('display.max_rows', 9999) pd.set_option('display.max_columns', 9999) pd.set_option('display.width', 9999) reviews = pd.read_csv('fandango_scores.csv') cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars'] norm_reviews = reviews[cols] # print(norm_reviews[:1]) import matplotlib.pyplot as plt from numpy import arange # The Axes.bar() method has 2 required parameters, left and height. # We use the left parameter to specify the x coordinates of the left sides of the bar. # We use the height parameter to specify the height of each bar ''' plt.subplots()是返回包含图形和轴对象的元组的函数。 因此,当使用fig, ax = plt.subplots()你解压缩这个元组到变量fig和ax。 有fig,如果你想改变人物级别的属性或保存数字作为以后的图像文件是非常有用的(例如用fig.savefig('yourfilename.png')。 你当然不必使用返回的数字对象,但因此它经常可以看到很多人都用到它。 而且,所有轴对象(具有绘图方法的对象)都有一个父图形对象 ''' # 柱形图 bar # #num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars'] # # print(norm_reviews.head()) # # print(norm_reviews.ix[4, num_cols]) # 显示索引4的num_cols列结果 # bar_heights = norm_reviews.ix[2, num_cols].values #柱得到高度 # bar_positions = arange(5) + 0.75 # 柱离原点的位置 # fig, ax = plt.subplots() #ax对图进行操作 # ax.bar(bar_positions, bar_heights, 0.5) #.bar柱状图 0.5柱的宽度 # plt.show() # num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars'] # bar_heights = norm_reviews.ix[0, num_cols].values # bar_positions = arange(5) + 0.75 # tick_positions = range(1, 6) # fig, ax = plt.subplots() # ax.bar(bar_positions, bar_heights, 0.5) # ax.set_xticks(tick_positions) # x轴标签的位置 # ax.set_xticklabels(num_cols, rotation=45) # ax.set_xlabel('Rating Source') # ax.set_ylabel('Average Rating') # ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)') # plt.show() # num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars'] # bar_widths = norm_reviews.ix[0, num_cols].values # bar_positions = arange(5) + 0.75 # tick_positions = range(1, 6) # fig, ax = plt.subplots() # ax.barh(bar_positions, bar_widths, 0.5) #.barh得到横着的图 # ax.set_yticks(tick_positions) # ax.set_yticklabels(num_cols) # ax.set_ylabel('Rating Source') # ax.set_xlabel('Average Rating') # ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)') # plt.show() # 散点图scatter # fig, ax = plt.subplots() # ax.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm']) # ax.set_xlabel('Fandango') # ax.set_ylabel('Rotten Tomatoes') # plt.show() fig = plt.figure(figsize=(5, 10)) ax1 = fig.add_subplot(2, 1, 1) ax2 = fig.add_subplot(2, 1, 2) ax1.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm']) ax1.set_xlabel('Fandango') ax1.set_ylabel('Rotten Tomatoes') ax2.scatter(norm_reviews['RT_user_norm'], norm_reviews['Fandango_Ratingvalue']) ax2.set_xlabel('Rotten Tomatoes') ax2.set_ylabel('Fandango') plt.show()
import pandas as pd import matplotlib.pyplot as plt pd.set_option('display.height', 9999) pd.set_option('display.max_rows', 9999) pd.set_option('display.max_columns', 9999) pd.set_option('display.width', 9999) reviews = pd.read_csv('fandango_scores.csv') cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue'] norm_reviews = reviews[cols] # value_counts 针对每个值统计数量 # fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts() # fandango_distribution = fandango_distribution.sort_index() # # imdb_distribution = norm_reviews['IMDB_norm'].value_counts() # imdb_distribution = imdb_distribution.sort_index() # # print(fandango_distribution) # print(imdb_distribution) # hist函数,给定一堆数据,统计数据在某一值的个数。 # range 设置显示的范围,范围之外的将被舍弃(指定横轴) # bins : 显示柱状图的个数 # fig, ax = plt.subplots() # print(norm_reviews['Fandango_Ratingvalue']) # # ax.hist(norm_reviews['Fandango_Ratingvalue']) #默认bins=10 # # ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20) # ax.hist(norm_reviews['Fandango_Ratingvalue'], range=(4, 5), bins=20) # plt.show() # fig = plt.figure(figsize=(5, 20)) # ax1 = fig.add_subplot(4, 1, 1) # ax2 = fig.add_subplot(4, 1, 2) # ax3 = fig.add_subplot(4, 1, 3) # ax4 = fig.add_subplot(4, 1, 4) # ax1.hist(norm_reviews['Fandango_Ratingvalue'], bins=20, range=(0, 5)) # ax1.set_title('Distribution of Fandango Ratings') # ax1.set_ylim(0, 50) #set_ylim设置y轴的区间 # # ax2.hist(norm_reviews['RT_user_norm'], 20, range=(0, 5)) # ax2.set_title('Distribution of Rotten Tomatoes Ratings') # ax2.set_ylim(0, 50) # # ax3.hist(norm_reviews['Metacritic_user_nom'], 20, range=(0, 5)) # ax3.set_title('Distribution of Metacritic Ratings') # ax3.set_ylim(0, 50) # # ax4.hist(norm_reviews['IMDB_norm'], 20, range=(0, 5)) # ax4.set_title('Distribution of IMDB Ratings') # ax4.set_ylim(0, 50) # # plt.show() # 盒图(4分图) # fig, ax = plt.subplots() # print(norm_reviews['RT_user_norm']) # ax.boxplot(norm_reviews['RT_user_norm']) # ax.set_xticklabels(['Rotten Tomatoes']) # ax.set_ylim(0, 5) # plt.show() num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue'] fig, ax = plt.subplots() ax.boxplot(norm_reviews[num_cols].values) ax.set_xticklabels(num_cols, rotation=45) ax.set_ylim(0, 5) plt.show()
import pandas as pd import matplotlib.pyplot as plt # 不同学科男女的比例 women_degrees = pd.read_csv('percent-bachelors-degrees-women-usa.csv') # plt.plot(women_degrees['Year'], women_degrees['Biology']) # plt.show() # plt.plot(women_degrees['Year'], women_degrees['Biology'], c='blue', label='Women') # plt.plot(women_degrees['Year'], 100 - women_degrees['Biology'], c='green', label='Men') # plt.legend(loc='upper right') # plt.title('Percentage of Biology Degrees Awarded By Gender') # plt.show() # fig, ax = plt.subplots() # ax.plot(women_degrees['Year'], women_degrees['Biology'], label='Women') # ax.plot(women_degrees['Year'], 100 - women_degrees['Biology'], label='Men') # #图像的刻度、标注等部分作不显示设置 # ax.tick_params(bottom="off", top="off", left="off", right="off") # ax.set_title('Percentage of Biology Degrees Awarded By Gender') # ax.legend(loc="upper right") # # plt.show() # fig, ax = plt.subplots() # ax.plot(women_degrees['Year'], women_degrees['Biology'], c='blue', label='Women') # ax.plot(women_degrees['Year'], 100 - women_degrees['Biology'], c='green', label='Men') # ax.tick_params(bottom="off", top="off", left="off", right="off") # # for key, spine in ax.spines.items(): # spine.set_visible(False) # ax.legend(loc='upper right') # plt.show() major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics'] fig = plt.figure(figsize=(12, 12)) for sp in range(0, 4): ax = fig.add_subplot(2, 2, sp + 1) ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c='blue', label='Women') ax.plot(women_degrees['Year'], 100 - women_degrees[major_cats[sp]], c='green', label='Men') # Add your code here. # Calling pyplot.legend() here will add the legend to the last subplot that was created. plt.legend(loc='upper right') plt.show() major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics'] fig = plt.figure(figsize=(12, 12)) for sp in range(0, 4): ax = fig.add_subplot(2, 2, sp + 1) ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c='blue', label='Women') ax.plot(women_degrees['Year'], 100 - women_degrees[major_cats[sp]], c='green', label='Men') for key, spine in ax.spines.items(): spine.set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0, 100) ax.set_title(major_cats[sp]) ax.tick_params(bottom="off", top="off", left="off", right="off") # Calling pyplot.legend() here will add the legend to the last subplot that was created. plt.legend(loc='upper right') plt.show()
import pandas as pd import matplotlib.pyplot as plt women_degrees = pd.read_csv('percent-bachelors-degrees-women-usa.csv') major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics'] # plt.plot(women_degrees['Year'], women_degrees['Biology']) # plt.show() # plt.plot(women_degrees['Year'], women_degrees['Biology'], c='blue', label='Women') # plt.plot(women_degrees['Year'], 100 - women_degrees['Biology'], c='green', label='Men') # plt.legend(loc='upper right') # plt.title('Percentage of Biology Degrees Awarded By Gender') # plt.show() # fig, ax = plt.subplots() # ax.plot(women_degrees['Year'], women_degrees['Biology'], label='Women') # ax.plot(women_degrees['Year'], 100 - women_degrees['Biology'], label='Men') # #图像的刻度、标注等部分作不显示设置 # ax.tick_params(bottom="off", top="off", left="off", right="off") # ax.set_title('Percentage of Biology Degrees Awarded By Gender') # ax.legend(loc="upper right") # # plt.show() # fig, ax = plt.subplots() # ax.plot(women_degrees['Year'], women_degrees['Biology'], c='blue', label='Women') # ax.plot(women_degrees['Year'], 100 - women_degrees['Biology'], c='green', label='Men') # ax.tick_params(bottom="off", top="off", left="off", right="off") # # for key, spine in ax.spines.items(): # spine.set_visible(False) # ax.legend(loc='upper right') # plt.show() # fig = plt.figure(figsize=(12, 12)) # # for sp in range(0, 4): # ax = fig.add_subplot(2, 2, sp + 1) # ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c='blue', label='Women') # ax.plot(women_degrees['Year'], 100 - women_degrees[major_cats[sp]], c='green', label='Men') # # # Calling pyplot.legend() here will add the legend to the last subplot that was created. # plt.legend(loc='upper right') # plt.show() # major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics'] # fig = plt.figure(figsize=(12, 12)) # # for sp in range(0, 4): # ax = fig.add_subplot(2, 2, sp + 1) # ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c='blue', label='Women') # ax.plot(women_degrees['Year'], 100 - women_degrees[major_cats[sp]], c='green', label='Men') # for key, spine in ax.spines.items(): # spine.set_visible(False) # ax.set_xlim(1968, 2011) # ax.set_ylim(0, 100) # ax.set_title(major_cats[sp]) # ax.tick_params(bottom="off", top="off", left="off", right="off") # # # Calling pyplot.legend() here will add the legend to the last subplot that was created. # plt.legend(loc='upper right') # plt.show() # Setting Line Width cb_dark_blue = (0 / 255, 107 / 255, 164 / 255) cb_orange = (255 / 255, 128 / 255, 14 / 255) # # fig = plt.figure(figsize=(12, 12)) # # for sp in range(0, 4): # ax = fig.add_subplot(2, 2, sp + 1) # # Set the line width when specifying how each line should look. # ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women', linewidth=10) # ax.plot(women_degrees['Year'], 100 - women_degrees[major_cats[sp]], c=cb_orange, label='Men', linewidth=10) # for key, spine in ax.spines.items(): # spine.set_visible(False) # ax.set_xlim(1968, 2011) # ax.set_ylim(0, 100) # ax.set_title(major_cats[sp]) # ax.tick_params(bottom="off", top="off", left="off", right="off") # # plt.legend(loc='upper right') # plt.show() stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics'] # fig = plt.figure(figsize=(18, 3)) # # for sp in range(0, 6): # ax = fig.add_subplot(1, 6, sp + 1) # ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3) # ax.plot(women_degrees['Year'], 100 - women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=3) # for key, spine in ax.spines.items(): # spine.set_visible(False) # ax.set_xlim(1968, 2011) # ax.set_ylim(0, 100) # ax.set_title(stem_cats[sp]) # ax.tick_params(bottom="off", top="off", left="off", right="off") # # plt.legend(loc='upper right') # plt.show() # fig = plt.figure(figsize=(18, 3)) # # for sp in range(0, 6): # ax = fig.add_subplot(1, 6, sp + 1) # ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3) # ax.plot(women_degrees['Year'], 100 - women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=3) # for key, spine in ax.spines.items(): # spine.set_visible(False) # ax.set_xlim(1968, 2011) # ax.set_ylim(0, 100) # ax.set_title(stem_cats[sp]) # ax.tick_params(bottom="off", top="off", left="off", right="off") # plt.legend(loc='upper right') # plt.show() # fig = plt.figure(figsize=(18, 3)) # # for sp in range(0, 6): # ax = fig.add_subplot(1, 6, sp + 1) # ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3) # ax.plot(women_degrees['Year'], 100 - women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=3) # for key, spine in ax.spines.items(): # spine.set_visible(False) # ax.set_xlim(1968, 2011) # ax.set_ylim(0, 100) # ax.set_title(stem_cats[sp]) # ax.tick_params(bottom="off", top="off", left="off", right="off") # # if sp == 0: # ax.text(2005, 87, 'Men') #在指定位置添加文字 # ax.text(2002, 8, 'Women') # elif sp == 5: # ax.text(2005, 62, 'Men') # ax.text(2001, 35, 'Women') # plt.show()
相关文章推荐
- python数据分析基础2_matplotlib
- 从零开始学Python数据分析【12】-- matplotlib(箱线图)
- 从零开始学Python数据分析【11】-- matplotlib(饼图)
- python 机器学习的开发环境搭建(numpy,scipy,matplotlib)
- Python数据分析matplotlib设置多个子图的间距方法
- Python 机器学习的开发环境搭建(numpy,scipy,matplotlib)
- 【Matplotlib】数据可视化实例分析
- 数据分析之Matplotlib绘图-02
- 机器学习-Matplotlib绘制决策树
- python数据分析——matplotlib生成折线图,散点图和直方图
- python-matplotlib-作图分析
- 分析数据:使用Matplotlib创建散点图---路径问题
- 动态可视化 数据可视化之魅D3,Processing,pandas数据分析,科学计算包Numpy,可视化包Matplotlib,Matlab语言可视化的工作,Matlab没有指针和引用是个大问题
- Pandas与Matplotlib在统计分析中的应用
- Python 数据分析之matplotlib
- [python之数据分析] 基础篇1- Numpy,Scipy,Matplotlib 快速入门攻略
- python:selenium+matplotlib,分析某体育论坛中,最受欢迎的nba球队
- 机器学习的相关模块安装:pandas,numpy, scipy,scikit_learn(sklearn), matplotlib在winows系统下的安装问题
- 《机器学习实战》2.2.2分析数据:使用matplotlib创建散点图