zajęcia_2 dataset

2022-03-19 12:27:30 +01:00 · 2022-03-19 12:27:30 +01:00 · d9491fd0e1
commit d9491fd0e1
parent 0e772419b8
3 changed files with 19124 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,152 @@
+# Created by .ignore support plugin (hsz.mobi)
+### Python template
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# IPython Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# dotenv
+.env
+
+# virtualenv
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+
+# Rope project settings
+.ropeproject
+### VirtualEnv template
+# Virtualenv
+# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
+.Python
+[Bb]in
+[Ii]nclude
+[Ll]ib
+[Ll]ib64
+[Ll]ocal
+[Ss]cripts
+pyvenv.cfg
+.venv
+pip-selfcheck.json
+### JetBrains template
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff:
+.idea/workspace.xml
+.idea/tasks.xml
+.idea/dictionaries
+.idea/vcs.xml
+.idea/jsLibraryMappings.xml
+
+# Sensitive or high-churn files:
+.idea/dataSources.ids
+.idea/dataSources.xml
+.idea/dataSources.local.xml
+.idea/sqlDataSources.xml
+.idea/dynamic.xml
+.idea/uiDesigner.xml
+
+# Gradle:
+.idea/gradle.xml
+.idea/libraries
+
+# Mongo Explorer plugin:
+.idea/mongoSettings.xml
+
+.idea/
+
+## File-based project format:
+*.iws
+
+## Plugin-specific files:
+
+# IntelliJ
+/out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+
--- a/2/games.csv
+++ b/2/games.csv
--- a/2/main.py
+++ b/2/main.py
@ -0,0 +1,171 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import sklearn.model_selection
+import numpy as np
+
+# funkcja usuwająca wiersze zawierające platformę "Stadia"
+def delete_stadia(games):
+    index_list = []
+    for i in range(0, len(games["platform"])):
+        try:
+            if games["platform"][i] == " Stadia":
+                index_list.append(i)
+        except:
+            continue
+    games.drop(index_list, inplace=True)
+
+# funkcja usuwająca wiersze zawierające "tbd" w kolumnie "user_review"
+def delete_tbd(games):
+    index_list = []
+    for i in range(0, len(games["platform"])):
+        try:
+            if games["user_review"][i] == "tbd":
+                index_list.append(i)
+        except:
+            continue
+    games.drop(index_list, inplace=True)
+
+# funkcja zmieniająca kolumnę "user_review" ze stringa na numeric
+def user_review_to_numeric(games):
+    games["user_review"] = pd.to_numeric(games["user_review"])
+
+# funkcja normalizująca wartości w kolumnie "meta_score" i "user_review"
+def normalization(games):
+    games['meta_score'] = games['meta_score'] / 100.0
+    games['user_review'] = games['user_review'] / 10.0
+
+# funkcja przygotowująca dane metacritic
+def preparation_meta_plot(games):
+    keys = games["meta_score"].value_counts().keys().sort_values()
+    values = []
+    for k in keys:
+        values.append(games["meta_score"].value_counts()[k])
+    return values
+
+# funkcja przygotowująca dane użytkowników
+def preparation_users_plot(games):
+    keys = games["user_review"].value_counts().keys().sort_values()
+    values = []
+    for k in keys:
+        values.append(games["user_review"].value_counts()[k])
+    return values
+
+# funkcja tworząca wykres
+def create_plot(games):
+    keys_meta = games["meta_score"].value_counts().keys().sort_values()
+    values_meta = preparation_meta_plot(games)
+    keys_user = games["user_review"].value_counts().keys().sort_values()
+    values_user = preparation_users_plot(games)
+    fig = plt.figure(figsize=(10, 5))
+    plt.plot(keys_meta, values_meta, color="maroon", label = "metacritic")
+    plt.plot(keys_user, values_user, color='blue', label = "użytkownicy")
+    plt.xlabel("Ocena")
+    plt.ylabel("Liczba ocen")
+    plt.title("Oceny metacritic i użytkowników")
+    plt.legend()
+    plt.show()
+
+# funkcja dokonująca podziału na podzbiory train i test
+def create_train_test(games):
+    games_train, games_test = sklearn.model_selection.train_test_split(games, test_size=5810, random_state=1)
+    return games_train, games_test
+
+# funkcja pobierająca informacje z danej platformy o ocenach metacritic
+def get_platform_score_metacritic(games, platform):
+    meta_list = []
+    for i in range(0, len(games["platform"])):
+        try:
+            if games["platform"][i] == platform:
+                meta_list.append(games["meta_score"][i])
+        except:
+            continue
+    return meta_list
+
+# funkcja pobierająca informacje z danej platformy o ocenach użytkowników
+def get_platform_score_users(games, platform):
+    users_list = []
+    for i in range(0, len(games["platform"])):
+        try:
+            if games["platform"][i] == platform:
+                users_list.append(games["user_review"][i])
+        except:
+            continue
+    return users_list
+
+# funkcja wypisująca informacje o danych powiązanych z daną platformą (wielkość zbioru, średnia, minimum,
+# maksimum, odchylenie standardowe, mediana)
+def platform_information(games, platform):
+    list = get_platform_score_metacritic(games, platform)
+    list_users = get_platform_score_users(games, platform)
+
+    print("\n")
+
+    print("Wielkość zbioru dla%s: %.0f" % (platform, len(list)))
+
+    print("\n")
+
+    print("Średnia ocen metacritic dla%s: %.2f" % (platform, np.mean(list)))
+    print("Minimalna ocena metacritic dla%s: %.2f" % (platform, min(list)))
+    print("Maksymalna ocena metacritic dla%s: %.2f" % (platform, max(list)))
+    print("Odchylenie standardowe w ocenach metacritic dla%s: %.2f" % (platform, np.std(list)))
+    print("Mediana ocen metacritic dla%s: %.2f" % (platform, np.median(list)))
+
+    print("\n")
+
+    print("Średnia ocen użytkowników dla%s: %.2f" % (platform, np.mean(list_users)))
+    print("Minimalna ocena użytkowników dla%s: %.2f" % (platform, min(list_users)))
+    print("Maksymalna ocena użytkowników dla%s: %.2f" % (platform, max(list_users)))
+    print("Odchylenie standardowe w ocenach użytkowników dla%s: %.2f" % (platform, np.std(list_users)))
+    print("Mediana ocen użytkowników dla%s: %.2f" % (platform, np.median(list_users)))
+
+# funkcja wypisująca informacje o danych (wielkość zbioru, średnia, minimum,
+# maksimum, odchylenie standardowe, mediana, rozkład częstości przykładów)
+def dataset_information(games):
+    print("Wielkość zbioru: %.0f" % games["meta_score"].describe().loc[['count']][0])
+
+    print("\n")
+
+    print("Średnia ocen metacritic: %.2f" % games["meta_score"].mean())
+    print("Minimalna ocena metacritic: %.2f" % games["meta_score"].describe().loc[['min']][0])
+    print("Maksymalna ocena metacritic: %.2f" % games["meta_score"].describe().loc[['max']][0])
+    print("Odchylenie standardowe w ocenach metacritic: %.2f" % games["meta_score"].describe().loc[['std']][0])
+    print("Mediana ocen metacritic: %.2f" % games["meta_score"].median())
+
+    print("\n")
+
+    print("Średnia ocen użytkowników: %.2f" % games["user_review"].mean())
+    print("Minimalna ocena użytkowników: %.2f" % games["user_review"].describe().loc[['min']][0])
+    print("Maksymalna ocena użytkowników: %.2f" % games["user_review"].describe().loc[['max']][0])
+    print("Odchylenie standardowe w ocenach użytkowników: %.2f" % games["user_review"].describe().loc[['std']][0])
+    print("Mediana ocen użytkowników: %.2f" % games["user_review"].median())
+
+    # wykres
+    create_plot(games)
+
+
+
+
+
+# wczytanie danych
+games=pd.read_csv('games.csv')
+
+# usunięcie 5 wyników "Stadia"
+delete_stadia(games)
+
+# usunięcie wyników zawierających "tbd" w kolumnie "user_review"
+delete_tbd(games)
+
+# zmiana kolumny "user_review" na numeric
+user_review_to_numeric(games)
+
+# normalizacja kolumn "meta_score" i "user_review"
+normalization(games)
+
+# wypisanie informacji o danych
+dataset_information(games)
+
+# wypisanie informacji o danej platformie
+platform_information(games, " Xbox 360")
+
+# podział na podzbiory train i test
+games_train, games_test = create_train_test(games)