本文共 2024 字,大约阅读时间需要 6 分钟。
#_*_ coding: utf-8 _*_import numpy as npimport pandas as pdimport matplotlib.pyplot as pltdf = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar','foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'three','two', 'two', 'one', 'three'], 'C': np.random.randn(8), 'D': np.random.randn(8)})#分组(Grouping)print(df.groupby('A').describe())print(df.groupby(['A','B']).describe())# C ... D# count mean std ... 50% 75% max# A B ...# bar one 1.0 1.635945 NaN ... -2.127976 -2.127976 -2.127976# three 1.0 -1.323161 NaN ... -1.473265 -1.473265 -1.473265# two 1.0 -0.273423 NaN ... -0.635216 -0.635216 -0.635216# foo one 2.0 0.413473 0.268983 ... 0.443146 0.558410 0.673674# three 1.0 -1.413352 NaN ... -0.902088 -0.902088 -0.902088# two 2.0 -0.081993 1.494454 ... -0.481973 -0.429782 -0.377591#数据透视表(Pivot Tables)print(pd.pivot_table(df,values=['C','D'],index='A',columns='B'))# C D# B one three two one three two# A# bar 1.635945 -1.323161 -0.273423 -2.127976 -1.473265 -0.635216# foo 0.413473 -1.413352 -0.081993 0.443146 -0.902088 -0.481973#时间序列(TimeSeries)#开始时pandas频率为10S,这样有100条数据,当将时间频率转换为5分钟后,有4条数据。rng = pd.date_range('1/1/2021', periods=100, freq='10S')ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)tp = ts.resample('5Min').sum()print(tp)# 2021-01-01 00:00:00 6402# 2021-01-01 00:05:00 6853# 2021-01-01 00:10:00 7344# 2021-01-01 00:15:00 3633# Freq: 5T, dtype: int32#时区转换print(tp.tz_localize('UTC'))print(tp.tz_localize('US/Eastern'))#可视化# ts = df.cumsum()# df.plot()# plt.show()#数据输入 / 输出df = df.set_index(df['A'])df.to_csv('foo.csv')df = pd.read_csv('foo.csv')df.to_excel('foo.xlsx', sheet_name='Sheet1')t = pd.read_excel('foo.xlsx', 'Sheet1', index_col='A', na_values=['NA'])
转载地址:http://ghben.baihongyu.com/