本文共 2530 字,大约阅读时间需要 8 分钟。
重新索引 2018/12/14
1.函数:
df.reindex(labels=None, index=None, columns=None, axis=None, method=None,copy=True, level=None, fill_value=nan, limit=None, tolerance=None)# 重新索引会更改行标签和列标签。数据匹配标签。新标签位置插入缺失值(NA) fill or pad向前填充;bfill 向后填充df.reindex_like(other, method=None, copy=True, limit=None, tolerance=None)# 返回匹配索引的对象# 参数:pad/ffill -向前填充bfill/backfill - 向后填充nearest - 最近索引值填充df.rename(mapper=None, index=None, columns=None, axis=None,copy=True, inplace=False, level=None)#重命名# 允许基于一些映射(字典或者系列)或任意函数来重新标记一个轴# 参数:mapper, index, columns : dict-like or function# inplace默认为False并复制底层数据;inplace=True将数据重命名
2.实例
df = pd.DataFrame({ 'A': ['Tom','Jim','May'],'B': [1,2,3]},index=[0,2,4])df.reindex(index=[0,1,2,3,4])df.reindex(index=[0,1,2,3,4],method='ffill')df.reindex(index=[0,1,2,3,4],method='bfill')# A B A B A B# 0 Tom 1.0 0 Tom 1 0 Tom 1# 1 NaN NaN 1 Tom 1 1 Jim 2# 2 Jim 2.0 2 Jim 2 2 Jim 2# 3 NaN NaN 3 Jim 2 3 May 3# 4 May 3.0 4 May 3 4 May 3
# 实例2:Padding NAN's
df1 = pd.DataFrame(np.arange(18).reshape(6,3),columns=['col1','col2','col3'])df2 = pd.DataFrame(-np.arange(6).reshape(2,3),columns=['col1','col2','col3'])df2.reindex_like(df1,method='ffill')#向前填充 or pad# col1 col2 col3# 0 0 -1 -2# 1 -3 -4 -5# 2 -3 -4 -5# 3 -3 -4 -5# 4 -3 -4 -5# 5 -3 -4 -5df2.reindex_like(df1,method='bfill')#向后填充 or backfill# col1 col2 col3# 0 0.0 -1.0 -2.0# 1 -3.0 -4.0 -5.0# 2 NaN NaN NaN# 3 NaN NaN NaN# 4 NaN NaN NaN# 5 NaN NaN NaN# 实例:填充限制- 指定连续匹配的最大计数。df2.reindex_like(df1)# col1 col2 col3# 0 0.0 -1.0 -2.0# 1 -3.0 -4.0 -5.0# 2 NaN NaN NaN# 3 NaN NaN NaN# 4 NaN NaN NaN# 5 NaN NaN NaNdf2.reindex_like(df1,method='ffill',limit=1)# col1 col2 col3# 0 0.0 -1.0 -2.0# 1 -3.0 -4.0 -5.0# 2 -3.0 -4.0 -5.0# 3 NaN NaN NaN# 4 NaN NaN NaN# 5 NaN NaN NaNdf2.reindex_like(df1,method='ffill',limit=2)# col1 col2 col3# 0 0.0 -1.0 -2.0# 1 -3.0 -4.0 -5.0# 2 -3.0 -4.0 -5.0# 3 -3.0 -4.0 -5.0# 4 NaN NaN NaN# 5 NaN NaN NaN
# 实例3:重命名
df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})df.rename(index='行{}'.format, columns={"A": "a", "B": "c"})# 更改行名列名df.rename(str.lower, axis='columns') # 列名变小写df.rename({1: 2, 2: 4}, axis='index') # 更改行名 等价axis=0# a c # a b # A B# 行0 1 4 # 0 1 4 # 0 1 4# 行1 2 5 # 1 2 5 # 2 2 5# 行2 3 6 # 2 3 6 # 4 3 6
# 实例4:reindex的坑
df = pd.DataFrame(np.arange(12).reshape(6, 2), columns=['A', 'B'],index=list('abcdef'))df.reindex(['b', 'c', 'e']) #应该这样用 等价df.iloc[[1, 2, 4]]df.reindex([1, 2, 4]) #出现异常值# A B # A B# b 2 3 # 1 NaN NaN# c 4 5 # 2 NaN NaN# e 8 9 # 4 NaN NaN
转载地址:http://sfqbf.baihongyu.com/