当前位置 首页 电影 《HyperProjection舞“排球少年!!顶端的风景”幕后纪录》垃圾场决战上

剧情简介

")#2.训练词向量all_words=data.get_all_words()#用所有的数据集来训练词向量,而不是只用训练集。w2v=Word2Vec([all_words],sg=1,size=vector_size,negative=5,iter=5,window=5)vector=[]forwinall_words:vector.append(w2v.wv[w])#3.将无子集的电影删除。删除后同步更新全部的all_movies、all_words、all_movies_to_cleanprint("电影大集合的数量",len(all_movies))all_movies=data.remove_no_subset(all_movies)print("删除子集后的电影大集合的数量",len(all_movies))foriinrange(len(all_movies)):ifnotall_words[i]inall_movies:all_movies=Noneall_words=Noneall_movies_to_clean=Nonevector=Nonebreak#4.将影评中的影评词语转化成词向量review_words=pd.read_csv("stage3Datasets/"+predictionId+".csv")review_words=review_words["review_words"].valuesreview_vectors=[]forwinreview_words:try:print(w)review_vectors.append(w2v.wv[w])except:continue#5.计算影评中的单词平均向量,即唯一向量review_sum=0forvinreview_vectors:review_sum+=vreview_vector=review_sum/len(review_vectors)#6.将所有影片通过平均向量的相似度进行排序sim=[]forvinvector:sim.append(cosine_similarity(review_vector.reshape(-1,1),v.reshape(-1,1)))sim=np.array(sim)print("#"*80)print(sim)print(type(sim))print("#"*80)print(sim.shape)sim=sim.reshape(-1,1)target_index=np.argsort(sim,axis=0)[::-1]

Copyright © 2025