1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
| import os import operator import sys
def get_ratings(input_file): ''' 读取评分数据 ''' if not os.path.exists(input_file): return {} record = {} avg_ratings = {} linenum = 0 for line in open(input_file): if linenum == 0: linenum += 1 continue item = line.strip().split("::") if len(item) < 4: continue userid, itemid, rating = item[0], item[1], float(item[2]) if itemid not in record: record[itemid] = [0, 0] record[itemid][0] += rating record[itemid][1] += 1 for itemid in record: avg_ratings[itemid] = round(record[itemid][0]/record[itemid][1], 3) return avg_ratings
def get_item_content(avg_ratings, input_file): ''' 读取物品属性数据 ''' if not os.path.exists(input_file): return {},{} topk = 100 record = {} item_content = {} content_sort = {} linenum = 0 for line in open(input_file, 'r', encoding='UTF-8'): if linenum ==0: linenum+=1 continue item = line.strip().split('::') if len(item)< 3: continue itemid, content_str = item[0], item[-1] content_list = content_str.strip().split('|') ratio = round(1/len(content_list), 3) if itemid not in item_content: item_content[itemid] = {} for fix_content in content_list: item_content[itemid][fix_content] = ratio for itemid in item_content: for content in item_content[itemid]: if content not in record: record[content] = {} itemid_rating_score = avg_ratings.get(itemid, 0) record[content][itemid] = itemid_rating_score for content in record: if content not in content_sort: content_sort[content] = [] for zuhe in sorted(record[content].items(), key=operator.itemgetter(1), reverse=True)[:topk]: content_sort[content].append(zuhe[0]) return item_content, content_sort
def get_time_score(timestamp): ''' 计算时间得分 ''' fix_time_stamp = 1476086345 total_sec = 24*60*60 delta = (fix_time_stamp - timestamp)/total_sec/100 return round(1/(1+delta), 3)
def get_up(item_content, input_file): ''' 用户画像 ''' if not os.path.exists(input_file): return {} record = {} up = {} score_thr = 4.0 topk = 2 linenum = 0 for line in open(input_file): if linenum == 0: linenum += 1 continue item = line.strip().split('::') if len(item) < 4: continue userid, itemid, rating, timestamp = item[0], item[1], float(item[2]), int(item[3]) if rating < score_thr: continue if itemid not in item_content: continue time_score = get_time_score(timestamp) if userid not in record: record[userid] = {} for fix_cate in item_content[itemid]: if fix_cate not in record[userid]: record[userid][fix_cate] = 0 record[userid][fix_cate] += rating * time_score * item_content[itemid][fix_cate] for userid in record: if userid not in up: up[userid] = [] total_score = 0 for zuhe in sorted(record[userid].items(), key = operator.itemgetter(1), reverse=True)[:topk]: up[userid].append((zuhe[0], zuhe[1])) total_score += zuhe[1] for index in range(len(up[userid])): up[userid][index] = (up[userid][index][0], round(up[userid][index][1]/total_score, 3)) return up
def recommend(item_sort, up, userid, topk=10): ''' 根据用户画像为指定用户做推荐 ''' if userid not in up: return {} recom_result = {} if userid not in recom_result: recom_result[userid] = [] for zuhe in up[userid]: cate = zuhe[0] ratio = zuhe[1] num = int(topk*ratio) + 1 if cate not in item_sort: continue recom_list = item_sort[cate][:num] recom_result[userid] += recom_list return recom_result
if __name__ == "__main__": avg_ratings = get_ratings("ratings.txt") item_content, content_sort = get_item_content(avg_ratings, "movies.txt") up = get_up(item_content, "ratings.txt") print(recommend(content_sort, up, "10"))
|