问题描述
我的问题是“我想将召回代码转换为精确代码”。 (回忆部分)这是我从 kaggle 得到的所有评估模型代码。
#Top-N accuracy metrics consts
EVAL_RANDOM_SAMPLE_NON_INteraCTED_ITEMS = 100
class ModelEvaluator:
def get_not_interacted_items_sample(self,person_id,sample_size,seed=42):
interacted_items = get_items_interacted(person_id,interactions_full_indexed_df)
all_items = set(articles_df['contentId'])
non_interacted_items = all_items - interacted_items
random.seed(seed)
non_interacted_items_sample = random.sample(non_interacted_items,sample_size)
return set(non_interacted_items_sample)
def _verify_hit_top_n(self,item_id,recommended_items,topn):
try:
index = next(i for i,c in enumerate(recommended_items) if c == item_id)
except:
index = -1
hit = int(index in range(0,topn))
return hit,index
def evaluate_model_for_user(self,model,person_id):
#Getting the items in test set
interacted_values_testset = interactions_test_indexed_df.loc[person_id]
if type(interacted_values_testset['contentId']) == pd.Series:
person_interacted_items_testset = set(interacted_values_testset['contentId'])
else:
person_interacted_items_testset = set([int(interacted_values_testset['contentId'])])
interacted_items_count_testset = len(person_interacted_items_testset)
#Getting a ranked recommendation list from a model for a given user
person_recs_df = model.recommend_items(person_id,items_to_ignore=get_items_interacted(person_id,interactions_train_indexed_df),topn=10000000000)
hits_at_5_count = 0
hits_at_10_count = 0
#For each item the user has interacted in test set
for item_id in person_interacted_items_testset:
#Getting a random sample (100) items the user has not interacted
#(to represent items that are assumed to be no relevant to the user)
non_interacted_items_sample = self.get_not_interacted_items_sample(person_id,sample_size=EVAL_RANDOM_SAMPLE_NON_INteraCTED_ITEMS,seed=item_id%(2**32))
#Combining the current interacted item with the 100 random items
items_to_filter_recs = non_interacted_items_sample.union(set([item_id]))
#Filtering only recommendations that are either the interacted item or from a random sample of 100 non-interacted items
valid_recs_df = person_recs_df[person_recs_df['contentId'].isin(items_to_filter_recs)]
valid_recs = valid_recs_df['contentId'].values
#Verifying if the current interacted item is among the Top-N recommended items
hit_at_5,index_at_5 = self._verify_hit_top_n(item_id,valid_recs,5)
hits_at_5_count += hit_at_5
hit_at_10,index_at_10 = self._verify_hit_top_n(item_id,10)
hits_at_10_count += hit_at_10
#Recall is the rate of the interacted items that are ranked among the Top-N recommended items,#when mixed with a set of non-relevant items
recall_at_5 = hits_at_5_count / float(interacted_items_count_testset)
recall_at_10 = hits_at_10_count / float(interacted_items_count_testset)
person_metrics = {'hits@5_count':hits_at_5_count,'hits@10_count':hits_at_10_count,'interacted_count': interacted_items_count_testset,'recall@5': recall_at_5,'recall@10': recall_at_10}
return person_metrics
def evaluate_model(self,model):
#print('Running evaluation for users')
people_metrics = []
for idx,person_id in enumerate(list(interactions_test_indexed_df.index.unique().values)):
#if idx % 100 == 0 and idx > 0:
# print('%d users processed' % idx)
person_metrics = self.evaluate_model_for_user(model,person_id)
person_metrics['_person_id'] = person_id
people_metrics.append(person_metrics)
print('%d users processed' % idx)
detailed_results_df = pd.DataFrame(people_metrics) \
.sort_values('interacted_count',ascending=False)
global_recall_at_5 = detailed_results_df['hits@5_count'].sum() / float(detailed_results_df['interacted_count'].sum())
global_recall_at_10 = detailed_results_df['hits@10_count'].sum() / float(detailed_results_df['interacted_count'].sum())
global_metrics = {'modelName': model.get_model_name(),'recall@5': global_recall_at_5,'recall@10': global_recall_at_10}
return global_metrics,detailed_results_df
model_evaluator = ModelEvaluator()
(精准部分)那我试试这个。结果是一个全局精度值(整体)超过 1.00
#Top-N accuracy metrics consts
EVAL_RANDOM_SAMPLE_NON_INteraCTED_ITEMS = 100
class ModelEvaluator:
def get_not_interacted_items_sample(self,10)
hits_at_10_count += hit_at_10
#precision is the rate of the interacted items that are ranked among the Top-N recommended items,#when mixed with a set of non-relevant items
precision_at_5 = hits_at_5_count / 5
precision_at_10 = hits_at_10_count / 10
person_metrics = {'hits@5_count':hits_at_5_count,'precision@5': precision_at_5,'precision@10': precision_at_10}
return person_metrics
def evaluate_model(self,person_id)
person_metrics['_person_id'] = person_id
people_metrics.append(person_metrics)
print('%d users processed' % idx)
detailed_results_df = pd.DataFrame(people_metrics)
global_precision_at_5 = detailed_results_df['hits@5_count'].sum() / 5
global_precision_at_10 = detailed_results_df['hits@10_count'].sum() / 10
global_metrics = {'modelName': model.get_model_name(),'precision@5': global_precision_at_5,'precision@10': global_precision_at_10}
return global_metrics,detailed_results_df
model_evaluator = ModelEvaluator()
我想,问题就在这里(我应该在分隔符处迭代 K: 5 和 10,但我不知道该怎么做)
detailed_results_df = pd.DataFrame(people_metrics)
global_precision_at_5 = detailed_results_df['hits@5_count'].sum() / 5
global_precision_at_10 = detailed_results_df['hits@10_count'].sum() / 10
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)