Pandas – 生成、读取、显示和保存数据

用到的函数

函数	作用
pd.read_csv(path, sep, header)	从csv文件中读取
pd.set_option()	pandas设置
pd.DataFrame()	生成DataFrame
df[‘uid’]	截取一列
df[0:10]	按行截取
df.iloc[3,0:5]	按数字索引截取
df.iterrows()	按行遍历
df.iteritems()	按列遍历
df.to_csv(path, index, header)	保存到csv文件

示例代码

import pandas as pd

'''
pd.read_cs
Args:
    path: 读取的csv文件
    sep: 分隔符,默认为',',有的csv文件使用'\t'作为分隔符
    header: 表头的位置，如果csv文件没有表头，则需指定header=None
'''
train_df = pd.read_csv('train.csv', sep=',', header=None)
#  指定列名
train_df.columns = ['label', 'uid', 'aid', 'time']

'''
显示数据
'''
pd.set_option('display.max_rows', None)  # 设置最大显示行数为无穷大
pd.set_option('display.max_columns', None)  # 打印所有列
print(train_df)  # 显示所有数据
print(train_df['label'])  # 仅显示label列
print(train_df[0:10])  # 仅显示前10行

# 生成数据
d = {'col1': [1, 2], 'col2': [3, 4]}      
df = pd.DataFrame(data=d)        
print(df)        
'''      
 col1 col2       
0 1 3        
1 2 4        
'''

'''
截取数据
'''
col2 = df['col2']  # 截取一列
row5 = df[:5]  # 截取前5行
img_name = df.iloc[n, 0]  # n行0列
landmarks = df.iloc[n, 1:].as_matrix()  # 截取n行1-?列并转换成矩阵

'''
遍历DataFrame
'''
for index, row in train_df.iterrows(): # 按行遍历
     print(row['uid']) 

for index, col in train_df.iteritems():  # 按列遍历
    print(index, col[0], col[1])  # 输出列名和每列的前两行

# 添加一个字段并全部置1
train_df['new_col'] = 1


# 将DataFrame保存到csv文件
'''
train_df .to_csv
Args:
    path: 保存的路径
    index: 是否保存id序号，默认为True
    header: 保存表头的位置，如果不保存表头，需要指定header=None
'''
train_df.to_csv('save.csv', index=False, header=None)

import pandas as pd

'''

pd.read_cs

Args:

path: 读取的csv文件

sep: 分隔符,默认为',',有的csv文件使用'\t'作为分隔符

header: 表头的位置，如果csv文件没有表头，则需指定header=None

'''

train_df = pd.read_csv('train.csv', sep=',', header=None)

# 指定列名

train_df.columns = ['label', 'uid', 'aid', 'time']

'''

显示数据

'''

pd.set_option('display.max_rows', None) # 设置最大显示行数为无穷大

pd.set_option('display.max_columns', None) # 打印所有列

print(train_df) # 显示所有数据

print(train_df['label']) # 仅显示label列

print(train_df[0:10]) # 仅显示前10行

# 生成数据

d = {'col1': [1, 2], 'col2': [3, 4]}

df = pd.DataFrame(data=d)

print(df)

'''

col1 col2

0 1 3

1 2 4

'''

截取数据

'''

col2 = df['col2'] # 截取一列

row5 = df[:5] # 截取前5行

img_name = df.iloc[n, 0] # n行0列

landmarks = df.iloc[n, 1:].as_matrix() # 截取n行1-?列并转换成矩阵

'''

遍历DataFrame

'''

for index, row in train_df.iterrows(): # 按行遍历

print(row['uid'])

for index, col in train_df.iteritems(): # 按列遍历

print(index, col[0], col[1]) # 输出列名和每列的前两行

# 添加一个字段并全部置1

train_df['new_col'] = 1

# 将DataFrame保存到csv文件

'''

train_df .to_csv

Args:

path: 保存的路径

index: 是否保存id序号，默认为True

header: 保存表头的位置，如果不保存表头，需要指定header=None

'''

train_df.to_csv('save.csv', index=False, header=None)