您的位置:首页 > 编程语言 > Python开发

第8章 图形化显示地震危机数据(海地)

2017-12-06 22:06 351 查看

第8章 图形化显示地震危机数据(海地)

import pandas as pd
import numpy as np
from pandas import Series,DataFrame
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap

##################################数据处理#################################
data = pd.read_csv(r'E:\pydata-book-2nd-edition\pydata-book-2nd-edition\datasets\haiti\haiti.csv')
data.info()###可以看出CATEGORY列中有数据缺失需要处理
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3593 entries, 0 to 3592
Data columns (total 10 columns):
Serial            3593 non-null int64
INCIDENT TITLE    3593 non-null object
INCIDENT DATE     3593 non-null object
LOCATION          3592 non-null object
DESCRIPTION       3593 non-null object
CATEGORY          3587 non-null object
LATITUDE          3593 non-null float64
LONGITUDE         3593 non-null float64
APPROVED          3593 non-null object
VERIFIED          3593 non-null object
dtypes: float64(2), int64(1), object(7)
memory usage: 280.8+ KB

data.describe()####可以看出有异常地理位置
Out[16]:
Serial     LATITUDE    LONGITUDE
count  3593.000000  3593.000000  3593.000000
mean   2080.277484    18.611495   -72.322680
std    1171.100360     0.738572     3.650776
min       4.000000    18.041313   -74.452757
25%    1074.000000    18.524070   -72.417500
50%    2163.000000    18.539269   -72.335000
75%    3088.000000    18.561820   -72.293570
max    4052.000000    50.226029   114.174287

####对异常数据的处理
data = data[(data.LATITUDE > 18)&(data.LATITUDE < 20)&(data.LONGITUDE > -75)&(data.LONGITUDE < -70)&data.CATEGORY.notnull()]
data.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 3569 entries, 0 to 3592
Data columns (total 10 columns):
Serial            3569 non-null int64
INCIDENT TITLE    3569 non-null object
INCIDENT DATE     3569 non-null object
LOCATION          3568 non-null object
DESCRIPTION       3569 non-null object
CATEGORY          3569 non-null object
LATITUDE          3569 non-null float64
LONGITUDE         3569 non-null float64
APPROVED          3569 non-null object
VERIFIED          3569 non-null object
dtypes: float64(2), int64(1), object(7)
memory usage: 306.7+ KB

###CATEGORY中的存储类型
#2. Urgences logistiques | Vital Lines, 2d. Refuge | Shelter needed, 2a. Penurie d'aliments | Food Shortage,

##对一个字符串提取出','分隔的每个元素
def to_cat_list(catstr):
stripped = (x.strip() for x in catstr.split(','))
return [x for x in stripped if x]

###利用上一个函数对整个Series操作
def get_all_categories(cat_series):
cat_sets = (set(to_cat_list(x)) for x in cat_series)
return sorted(set.union(*cat_sets))

#修剪出编码和后面的英文名称
#'2.Urgences logistiques | Vital Lines' →('2', 'Vital Lines')
def get_english(cat):
code,names = cat.split('.')
if '|' in names:
names = names.split(' | ')[1]
return code,names.strip()

all_cats = get_all_categories(data.CATEGORY)

###制作字典,将编码和英文名称对应
english_mapping = dict(get_english(x) for x in all_cats)
#english_mapping['2a']
#Out[25]: 'Food Shortage'
#english_mapping['6c']
#Out[26]: 'Earthquake and aftershocks'

####抽取出所有的编码
def get_code(seq):
return [x.split('.')[0] for x in seq if x]

all_codes = get_code(all_cats)##所有编码组成的list

###用抽取出的不同编码组成索引
code_index = pd.Index(np.unique(all_codes))
###建立一个空DataFrame,行索引与data一样,列索引是不同的编码
dummy_frame = DataFrame(np.zeros((len(data),len(code_index))),index = data.index,columns = code_index)
#dummy_frame.ix[:,:6].info()
#<class 'pandas.core.frame.DataFrame'>
#Int64Index: 3569 entries, 0 to 3592
#Data columns (total 6 columns):
#1     3569 non-null float64
#1a    3569 non-null float64
#1b    3569 non-null float64
#1c    3569 non-null float64
#1d    3569 non-null float64
#2     3569 non-null float64
#dtypes: float64(6)
#memory usage: 195.2 KB

###对dummy_frame中每一行适当的值设为1
for row,cat in zip(data.index,data.CATEGORY):
codes = get_code(to_cat_list(cat))
dummy_frame.ix[row,codes] = 1
##dummy_frame与data连接,并在列名加前缀
data = data.join(dummy_frame.add_prefix('catagory_'))
#data.ix[:,10:15].info()
#<class 'pandas.core.frame.DataFrame'>
#Int64Index: 3569 entries, 0 to 3592
#Data columns (total 5 columns):
#catagory_1     3569 non-null float64
#catagory_1a    3569 non-null float64
#catagory_1b    3569 non-null float64
#catagory_1c    3569 non-null float64
#catagory_1d    3569 non-null float64
#dtypes: float64(5)
#memory usage: 327.3 KB

##################################画图#################################

###绘制黑白海地地图
def basic_haiti_map(ax = None,lllat=17.25,urlat=20.25,lllon=-75,urlon=-71):
m = Basemap(ax=ax,projection='stere',lon_0=(urlon+lllon)/2,lat_0=(urlat+lllat)/2,
llcrnrlat=lllat,urcrnrlat=urlat,
llcrnrlon=lllon,urcrnrlon=urlon,resolution='f')
m.drawcoastlines()
m.drawstates()
m.drawcountries()
return m

fig,axes = plt.subplots(nrows=2,ncols=2,figsize=(12,10))
fig.subplots_adjust(hspace = 0.05,wspace = 0.05)

to_plot=['2a','1','3c','7a']

lllat = 17.25
urlat = 20.25
lllon = -75
urlon = -71

###在每个subplot内画一张地图
for code,ax in zip(to_plot,axes.flat):
m = basic_haiti_map(ax,lllat=lllat,urlat=urlat,lllon=lllon,urlon=urlon)
cat_data = data[data['catagory_%s' % code] == 1]
###计算地图的投影坐标
######注意此处按书上运行不出来,需要先将Series格式转为array
x,y = m(np.array(cat_data.LONGITUDE),np.array(cat_data.LATITUDE))
###在地图上画点
m.plot(x,y,'k.',alpha=0.5)
ax.set_title('%s: %s' % (code,english_mapping[code]))




############叠加道路信息画出反应太子港地区食物短缺的图
fig = plt.figure()###创建画布
ax = fig.add_subplot(1,1,1)####添加一个图
###设定合适的经纬度
lllat = 18.43;urlat = 18.69;lllon = -72.57;urlon = -72.08
####利用上面创建绘制海地地图的函数画出太子港地区黑白地图
m = basic_haiti_map(ax,lllat = lllat,urlat=urlat,lllon=lllon,urlon=urlon)
#######叠加道路数据
shapefile_path='E:\pydata-book-2nd-edition\pydata-book-2nd-edition\datasets\haiti\PortAuPrince_Roads\PortAuPrince_Roads'
#####注意这里的路径加上文件名但不加后缀
m.readshapefile(shapefile_path,'road')
code = '2a'
##读取出现食物短缺的条目
cat_data = data[data['catagory_%s' % code]==1]
####将条目中的经纬度映射到xy坐标
x,y = m(np.array(cat_data.LONGITUDE),np.array(cat_data.LATITUDE))
##画图并设置标题
m.plot(x,y,'k.',alpha = 0.5)
ax.set_title('%s reported in Port-au-Prince' % english_mapping[code])
##保存图片
plt.savefig(r'E:\pycode\data\food_shortage.png',dpi=400,bbox_inches = 'tight')


内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  图形 python 数据分析