如何解决python中unhashable错误的问题？

问题描述

我有两个文本数据集，所以我做了清理过程，然后我想根据 name 列将它们分组，但是在运行代码后我得到了错误 unhashable type: 'list'



def cleanDataD(path='data1.csv'):
    
    df = pd.read_csv(path,encoding = "ISO-8859-1")
    
    df['name'] = df['name'].fillna(' ')
    df['name'] = df['name'].apply(lambda x: remove_punct(x) )
    df['name'] = df['name'].apply(lambda x: tokenizer.tokenize(x.lower()) )
    df['name'] = df['name'].apply(lambda x: remove_stopWords(x) )
    df['name_CV'] = df['name'].apply(lambda x: word_lemmatiser(x) )            
    df['name_CV'] = df['name_CV'].apply(lambda x: ['none'] if (len(x)== 0) else x)
    
    df['city'] = df['city'].fillna(' ')
    df['city'] = df['city'].apply(lambda x: remove_punct(x) )
    df['city'] = df['city'].apply(lambda x: tokenizer.tokenize(x.lower()) )
    df['city'] = df['city'].apply(lambda x: remove_stopWords(x) )
    df['city_CV']  = df['city'].apply(lambda x: word_lemmatiser(x) )   
    df['city_CV'] = df['city_CV'].apply(lambda x: ['none'] if (len(x)== 0) else x)
    
    df = df.fillna(0)
    return df 

def cleanDataH(path='data2.csv'):
    
    df = pd.read_csv(path,encoding = "utf_8")
    
    df['name'] = df['name'].fillna(' ')
    df['name'] = df['name'].apply(lambda x: remove_punct(x) )
    df['name'] = df['name'].apply(lambda x: tokenizer.tokenize(x.lower()) )
    df['name'] = df['name'].apply(lambda x: remove_stopWords(x) )
    df['name_CV'] = df['name'].apply(lambda x: word_lemmatiser(x) )            
    df['name_CV'] = df['name_CV'].apply(lambda x: ['none'] if (len(x)== 0) else x)
    
    df['city'] = df['city'].fillna(' ')
    df['city'] = df['city'].apply(lambda x: remove_punct(x) )
    df['city'] = df['city'].apply(lambda x: tokenizer.tokenize(x.lower()) )
    df['city'] = df['city'].apply(lambda x: remove_stopWords(x) )
    df['city_CV']  = df['city'].apply(lambda x: word_lemmatiser(x) )   
    df['city_CV'] = df['city_CV'].apply(lambda x: ['none'] if (len(x)== 0) else x)
    
    df = df.fillna(0)
    return df

df_D = cleanDataD(path='data1.csv')
df_H = cleanDataH(path='data2.csv')

indexer =rl.Index()
indexer.block('name')
ff = indexer.index(df_D,df_H)

TypeError                                 Traceback (most recent call last)
<ipython-input-35-c9ee905d6674> in <module>
----> 1 ff = indexer.index(df_H,df_D)

TypeError: unhashable type: 'list'

如何修复这个错误？

解决方法

暂无找到可以解决该程序问题的有效方法，小编努力寻找整理中！

如果你已经找到好的解决方法，欢迎将解决方案带上本链接一起发送给小编。

小编邮箱:dio#foxmail.com (将#修改为@）

pandas python record-linkage