02.26 python绘制新冠肺炎Bar Chart Race(条形竞赛图)

1,先取得数据

我找的是这个地址:https://github.com/CSSEGISandData/COVID-19

进入到目录:/blob/master/csse_covid_19_data/csse_covid_19_time_series/

下载time_series_19-covid-Confirmed.csv这个文件

这个来源应该是The Center for Systems Science and Engineering (CSSE) at JHU(Johns Hopkins University,约翰斯·霍普金斯大学)

2,导入各种需要用到的模块

<code>import osimport sysimport numpy as npimport pandas as pdfrom pandas import DataFramefrom matplotlib import cmimport matplotlibimport matplotlib.pyplot as pltimport matplotlib.ticker as tickerimport matplotlib.animation as animationmatplotlib.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签matplotlib.rcParams['axes.unicode_minus'] = False # 用来正常显示负号/<code>

3,读入文件

<code>def get_data_filepath(): cur_dir = os.path.split(os.path.realpath(__file__))[0] # 得到当前路径 out_json_path = cur_dir + '/time_series_19-covid-Confirmed.csv' return out_json_pathdef get_df_all(): df = pd.read_csv(get_data_filepath(),header=0,encoding="utf-8") return dfdf_all = get_df_all()print(df_all)/<code>


4,加工某一天的数据

<code>def get_df(day): # 去掉大陆数据,只统计国外的 df = df_all[-df_all['Country/Region'].isin(['Mainland China'])] # 只取某一天的数据 df = df[['Country/Region',day]] # 按照地区分组取总数 df = df.groupby('Country/Region', as_index=False)[day].sum() # 按照总数排序 df = df.sort_values(by=day, ascending=True) return dfdef draw_barchart(day): dff = get_df(day) print(dff)df_all = get_df_all()draw_barchart('2/24/20')/<code>


5,先出图,不过暂时不好看

<code>def draw_barchart(day): dff = get_df(day) # 柱状图 ax.barh(dff['Country/Region'], dff[day]) plt.show()df_all = get_df_all()# 变量定义到函数外面来,因为要反复调用fig, ax = plt.subplots(figsize=(8, 6))draw_barchart('2/24/20')/<code>

6,加入颜色

<code>def draw_barchart(day): dff = get_df(day) color_list = [colors_dict[x] for x in dff['Country/Region']] # 柱状图 ax.barh(dff['Country/Region'], dff[day],color=color_list) plt.show()df_all = get_df_all()# 这里的分组排序等操作只是为了得到颜色列表df_temp = df_all.groupby('Country/Region', as_index=False)['2/24/20'].sum()df_temp = df_temp.sort_values(by='2/24/20', ascending=True)# 变量定义到函数外面来,因为要反复调用fig, ax = plt.subplots(figsize=(8, 6))country_region = df_temp['Country/Region'].unique()colors = cm.jet(np.linspace(0.1, 0.9, len(country_region)))colors_dict = dict(zip(country_region,colors))draw_barchart('2/24/20')/<code>

7,继续美化

<code>def draw_barchart(day): dff = get_df(day) color_list = [colors_dict[x] for x in dff['Country/Region']] # 柱状图 ax.barh(dff['Country/Region'], dff[day],color=color_list) for i, (value, name) in enumerate(zip(dff[day], dff['Country/Region'])): ax.text(value, i - .25, name + ' ' + str(value), ha='left', size=8, color='#777777') # 确诊 ax.text(1, 0.1, day, transform=ax.transAxes, color='#777777', size=46, ha='right') ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}')) ax.xaxis.set_ticks_position('top') ax.tick_params(axis='x', colors='#777777', labelsize=12) ax.set_yticks([]) ax.grid(which='major', axis='x', linestyle='-') ax.set_axisbelow(True) ax.text(0, 1.12, '确诊人数', transform=ax.transAxes, size=24, weight=600, ha='left') ax.text(1, 0, '作者:bjrobin2006@126.com', transform=ax.transAxes, ha='right', color='#777777', bbox=dict(facecolor='white', alpha=0.8, edgecolor='white')) plt.box(False) fig.tight_layout() # 图像外部边缘的调整 plt.show()/<code>


8,生成动画

<code>def save_ImageMagickWriter(ani): writer = animation.ImageMagickWriter(codec='h264') ani.save('bar_chart_race/ImageMagickWriter.gif', writer=writer)def animate(): dr = df_all.columns.values[4:] ani = animation.FuncAnimation(fig, draw_barchart, frames=dr, interval=200, repeat=False) save_ImageMagickWriter(ani)df_all = get_df_all()# 这里的分组排序等操作只是为了得到颜色列表df_temp = df_all.groupby('Country/Region', as_index=False)['2/24/20'].sum()df_temp = df_temp.sort_values(by='2/24/20', ascending=True)# 变量定义到函数外面来,因为要反复调用fig, ax = plt.subplots(figsize=(8, 6))country_region = df_temp['Country/Region'].unique()colors = cm.jet(np.linspace(0.1, 0.9, len(country_region)))colors_dict = dict(zip(country_region,colors))# draw_barchart('2/24/20')animate()/<code>

9,得到动图