1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
| def base_contribution_score(): return 1
def update_one_contribution_score(item_user_click_count): """ usercf user contribution score update v1 :param item_user_click_count: how many user have clicked this item :return: contribution score """ return 1 / math.log10(1 + item_user_click_count)
def update_two_contribution_score(click_time_one, click_time_two): """ user cf user contribution score update v2 :param click_time_one: different user click time to the same item :param click_time_two: :return: contribution score """ delta_time = abs(click_time_two - click_time_one) total_time = 60 * 60 * 24 delta_time = delta_time / total_time return 1 / (1 + delta_time)
def transfer_user_click(user_click): """ get item by user_click :param user_click: dict key userid, value:[itemid1, itemid2,...] :return: dict, key itemid value:[userid1,userid2,...] """ item_click_by_user = {} for user in user_click: item_list = user_click[user] for itemid in item_list: item_click_by_user.setdefault(itemid, []) item_click_by_user[itemid].append(user) return item_click_by_user
def cal_user_sim(item_click_by_user, user_click_time): """ get user sim info :param item_click_by_user: dict, key itemid value:[userid1,userid2] :return: dict key: itemid, value_key: itemid_j, value_value:sim """ co_appear = {} user_click_count = {} for itemid, user_list in item_click_by_user.items(): for index_i in range(0, len(user_list)): user_i = user_list[index_i] user_click_count.setdefault(user_i, 0) user_click_count[user_i] += 1 if user_i + "_" + itemid not in user_click_time: click_time_one = 0 else: click_time_one = user_click_time[user_i + "_" + itemid] for index_j in range(index_i + 1, len(user_list)): user_j = user_list[index_j] if user_j + "_" + itemid not in user_click_time: click_time_two = 0 else: click_time_two = user_click_time[user_j + "_" + itemid] co_appear.setdefault(user_i, {}) co_appear[user_i].setdefault(user_j, 0) co_appear[user_i][user_j] += update_two_contribution_score(click_time_one, click_time_two) co_appear.setdefault(user_j, {}) co_appear[user_j].setdefault(user_i, 0) co_appear[user_j][user_i] += update_two_contribution_score(click_time_one, click_time_two) user_sim_info = {} user_sim_info_sorted = {} for user_i, relate_user in co_appear.items(): user_sim_info.setdefault(user_i, {}) for user_j, cotime in relate_user.items(): user_sim_info[user_i].setdefault(user_j, 0) user_sim_info[user_i][user_j] = cotime / math.sqrt(user_click_count[user_i] * user_click_count[user_j]) for user in user_sim_info: user_sim_info_sorted[user] = sorted(user_sim_info[user].items(), \ key=operator.itemgetter(1), reverse=True) return user_sim_info_sorted
def cal_recom_result(user_click, user_sim): """ recom by usercf algo :param user_click: dict key userid value [itemid1,itemid2,...] :param user_sim: dict key userid value:[(userid1,score),(x,x),...] :return: dict, key userid value dict value_key: itemid, value_value: recom_score """ recom_result = {} topk_user = 3 item_num = 5 for user, item_list in user_click.items(): tmp_dict = {} for itemid in item_list: tmp_dict.setdefault(itemid, 1) recom_result.setdefault(user, {}) for zuhe in user_sim[user][:topk_user]: userid_j, sim_score = zuhe if userid_j not in user_click: continue for itemid_j in user_click[userid_j][:item_num]: recom_result[user].setdefault(itemid_j, sim_score) return recom_result
|