move data to data dir
This commit is contained in:
parent
6aed792d44
commit
04c9ce7f6e
Can't render this file because it is too large.
|
Can't render this file because it is too large.
|
Can't render this file because it is too large.
|
2
main.py
2
main.py
@ -40,7 +40,7 @@ def compare_games(title_1: str, title_2: str, df: pd.DataFrame, show_graph: bool
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
df = pd.read_pickle('games_processed_vectorized.csv')
|
||||
df = pd.read_pickle('data/games_processed_vectorized.csv')
|
||||
|
||||
while True:
|
||||
title_1 = input("Enter title 1: ")
|
||||
|
@ -34,7 +34,7 @@ def replace_with_vector(row, w2v):
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
df = pd.read_csv('games.csv')
|
||||
df = pd.read_csv('data/games.csv')
|
||||
|
||||
df['positive_percentage'] = df.apply(
|
||||
lambda row: calculate_positive_percentage(row.positive_ratings, row.negative_ratings), axis=1)
|
||||
@ -46,16 +46,16 @@ if __name__ == '__main__':
|
||||
df['all_categorical'] = df['categories'] + ';' + df['genres'] + ';' + df['steamspy_tags']
|
||||
df['all_categorical'] = df['all_categorical'].map(lambda row: row.strip().replace(' ', ';').lower())
|
||||
df['all_categorical'] = df['all_categorical'].apply(lambda row: replace(row))
|
||||
df.to_csv('games_processed.csv', index=False, encoding='utf-8')
|
||||
df.to_csv('data/games_processed.csv', index=False, encoding='utf-8')
|
||||
try:
|
||||
w2v = gensim.models.KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin',
|
||||
binary=True)
|
||||
df2 = pd.read_csv('games_processed.csv')
|
||||
df2 = pd.read_csv('data/games_processed.csv')
|
||||
df2['temp'] = df2['categories'] + ';' + df2['genres'] + ';' + df2['steamspy_tags']
|
||||
df2['temp'] = df2['temp'].map(lambda row: row.strip().replace(' ', ';').lower())
|
||||
df2['all_categorical_vector'] = df2['temp'].apply(lambda row: replace_with_vector(row, w2v))
|
||||
df2.drop('temp', inplace=True, axis=1)
|
||||
df2.to_pickle('games_processed_vectorized.csv')
|
||||
df2.to_pickle('data/games_processed_vectorized.csv')
|
||||
except:
|
||||
print('A local copy of GoogleNews-vectors-negative300.bin was not found. The file can be downloaded from '
|
||||
'https://www.kaggle.com/datasets/leadbest/googlenewsvectorsnegative300. Finishing without vectorization')
|
||||
|
Loading…
Reference in New Issue
Block a user