时间处理，时间抽取和虚拟变量

import pandas 
fa=pandas.read_csv('D://Python projects//reference data//4.16//data.csv',
                   encoding='utf8')
#字符串转成时间格式
fa['时间']=pandas.to_datetime(
        fa.注册时间,
        format='%Y/%m/%d')
#转化成规定的格式化时间
fa['格式化时间']=fa.时间.dt.strftime('%Y-%m-%d')
#时间属性抽取
fa['年']=fa.时间.dt.year
fa['月']=fa.时间.dt.month
fa['日']=fa.时间.dt.day
fa['周']=fa.时间.dt.weekday
fa['时']=fa.时间.dt.hour
fa['分']=fa.时间.dt.minute
fa['秒']=fa.时间.dt.second

#时间抽取
import pandas
ga=pandas.read_csv('D://Python projects//reference data//4.17//data.csv',
                   encoding='utf8')

#将时间列，转成时间格式的索引,通过pandas的strptime函数，把字符串转成时间格式
dataparse=lambda dates:pandas.datetime.strptime(
        dates,'%Y%m%d')

gb=pandas.read_csv('D://Python projects//reference data//4.17//data.csv',
                   encoding='utf8',
                   parse_dates=['date'],
                   date_parser=dataparse,
                   index_col='date')
#根据索引进行抽取
import datetime
date1=datetime.date(year=2016,month=2,day=1);
date2=datetime.date(year=2016,month=2,day=9);
#抽取时间段数据
gb.ix[date1:date2]
#抽取固定日期数据
gb.ix[[date1,date2]]


#根据时间列进行抽取
gc=pandas.read_csv('D://Python projects//reference data//4.17//data.csv',
                   encoding='utf8',
                   parse_dates=['date'],
                   date_parser=dataparse)

gc[(gc.date>=date1)&(gc.date<=date2)]

#虚拟变量
import pandas
ea=pandas.read_csv('D://Python projects//reference data//4.18//data.csv',
                   encoding='utf8')
#查看学历去重后的情况
ea['Education Level'].drop_duplicates()

"""
博士               Doctorate
学士       Bachelor's Degree
硕士         Master's Degree
副学士     Associate's Degree
专业院校           Some College
博士后              Post-Doc
职业院校          Trade School
高中           High School
小学          Grade School
"""

education_dict={
        'Grade School':1,
        'High School':2,
        'Trade School':3,
        'Some College':4,
        'Associate\'s Degree':5,
        'Bachelor\'s Degree':6,
        'Master\'s Degree':7,
        'Doctorate':8,
        'Post-Doc':9}

#生成虚拟变量
ea['Education value']=ea['Education Level'].map(education_dict)

ea['Gender'].drop_duplicates()
#空值不处理，保留所有属性值
dummy=pandas.get_dummies(
        ea,
        columns=['Gender'],
        prefix=['Gender'],
        prefix_sep="_",
        dummy_na=False,
        drop_first=False)

结果为：

时间处理，时间抽取和虚拟 变量

相关文章

时间处理，时间抽取和虚拟变量