move data to data dir

2023-01-27 18:43:32 +01:00 · 2023-01-27 18:43:32 +01:00 · 04c9ce7f6e
commit 04c9ce7f6e
parent 6aed792d44
5 changed files with 5 additions and 5 deletions
--- a/data/games.csv
+++ b/data/games.csv
--- a/data/games_processed.csv
+++ b/data/games_processed.csv
--- a/data/games_processed_vectorized.csv
+++ b/data/games_processed_vectorized.csv
--- a/main.py
+++ b/main.py
@ -40,7 +40,7 @@ def compare_games(title_1: str, title_2: str, df: pd.DataFrame, show_graph: bool

 if __name__ == '__main__':

-    df = pd.read_pickle('games_processed_vectorized.csv')
+    df = pd.read_pickle('data/games_processed_vectorized.csv')

    while True:
        title_1 = input("Enter title 1: ")
--- a/process_dataset.py
+++ b/process_dataset.py
@ -34,7 +34,7 @@ def replace_with_vector(row, w2v):


 if __name__ == '__main__':
-    df = pd.read_csv('games.csv')
+    df = pd.read_csv('data/games.csv')

    df['positive_percentage'] = df.apply(
        lambda row: calculate_positive_percentage(row.positive_ratings, row.negative_ratings), axis=1)
@ -46,16 +46,16 @@ if __name__ == '__main__':
    df['all_categorical'] = df['categories'] + ';' + df['genres'] + ';' + df['steamspy_tags']
    df['all_categorical'] = df['all_categorical'].map(lambda row: row.strip().replace(' ', ';').lower())
    df['all_categorical'] = df['all_categorical'].apply(lambda row: replace(row))
-    df.to_csv('games_processed.csv', index=False, encoding='utf-8')
+    df.to_csv('data/games_processed.csv', index=False, encoding='utf-8')
    try:
        w2v = gensim.models.KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin',
                                                              binary=True)
-        df2 = pd.read_csv('games_processed.csv')
+        df2 = pd.read_csv('data/games_processed.csv')
        df2['temp'] = df2['categories'] + ';' + df2['genres'] + ';' + df2['steamspy_tags']
        df2['temp'] = df2['temp'].map(lambda row: row.strip().replace(' ', ';').lower())
        df2['all_categorical_vector'] = df2['temp'].apply(lambda row: replace_with_vector(row, w2v))
        df2.drop('temp', inplace=True, axis=1)
-        df2.to_pickle('games_processed_vectorized.csv')
+        df2.to_pickle('data/games_processed_vectorized.csv')
    except:
        print('A local copy of GoogleNews-vectors-negative300.bin was not found. The file can be downloaded from '
              'https://www.kaggle.com/datasets/leadbest/googlenewsvectorsnegative300. Finishing without vectorization')