zajęcia_2 dataset
This commit is contained in:
parent
0e772419b8
commit
d9491fd0e1
152
.gitignore
vendored
Normal file
152
.gitignore
vendored
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
# Created by .ignore support plugin (hsz.mobi)
|
||||||
|
### Python template
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
env/
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*,cover
|
||||||
|
.hypothesis/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
target/
|
||||||
|
|
||||||
|
# IPython Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
.python-version
|
||||||
|
|
||||||
|
# celery beat schedule file
|
||||||
|
celerybeat-schedule
|
||||||
|
|
||||||
|
# dotenv
|
||||||
|
.env
|
||||||
|
|
||||||
|
# virtualenv
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
### VirtualEnv template
|
||||||
|
# Virtualenv
|
||||||
|
# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
|
||||||
|
.Python
|
||||||
|
[Bb]in
|
||||||
|
[Ii]nclude
|
||||||
|
[Ll]ib
|
||||||
|
[Ll]ib64
|
||||||
|
[Ll]ocal
|
||||||
|
[Ss]cripts
|
||||||
|
pyvenv.cfg
|
||||||
|
.venv
|
||||||
|
pip-selfcheck.json
|
||||||
|
### JetBrains template
|
||||||
|
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
|
||||||
|
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
||||||
|
|
||||||
|
# User-specific stuff:
|
||||||
|
.idea/workspace.xml
|
||||||
|
.idea/tasks.xml
|
||||||
|
.idea/dictionaries
|
||||||
|
.idea/vcs.xml
|
||||||
|
.idea/jsLibraryMappings.xml
|
||||||
|
|
||||||
|
# Sensitive or high-churn files:
|
||||||
|
.idea/dataSources.ids
|
||||||
|
.idea/dataSources.xml
|
||||||
|
.idea/dataSources.local.xml
|
||||||
|
.idea/sqlDataSources.xml
|
||||||
|
.idea/dynamic.xml
|
||||||
|
.idea/uiDesigner.xml
|
||||||
|
|
||||||
|
# Gradle:
|
||||||
|
.idea/gradle.xml
|
||||||
|
.idea/libraries
|
||||||
|
|
||||||
|
# Mongo Explorer plugin:
|
||||||
|
.idea/mongoSettings.xml
|
||||||
|
|
||||||
|
.idea/
|
||||||
|
|
||||||
|
## File-based project format:
|
||||||
|
*.iws
|
||||||
|
|
||||||
|
## Plugin-specific files:
|
||||||
|
|
||||||
|
# IntelliJ
|
||||||
|
/out/
|
||||||
|
|
||||||
|
# mpeltonen/sbt-idea plugin
|
||||||
|
.idea_modules/
|
||||||
|
|
||||||
|
# JIRA plugin
|
||||||
|
atlassian-ide-plugin.xml
|
||||||
|
|
||||||
|
# Crashlytics plugin (for Android Studio and IntelliJ)
|
||||||
|
com_crashlytics_export_strings.xml
|
||||||
|
crashlytics.properties
|
||||||
|
crashlytics-build.properties
|
||||||
|
fabric.properties
|
||||||
|
|
18801
Zajęcia 2/games.csv
Normal file
18801
Zajęcia 2/games.csv
Normal file
File diff suppressed because one or more lines are too long
171
Zajęcia 2/main.py
Normal file
171
Zajęcia 2/main.py
Normal file
@ -0,0 +1,171 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import sklearn.model_selection
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# funkcja usuwająca wiersze zawierające platformę "Stadia"
|
||||||
|
def delete_stadia(games):
|
||||||
|
index_list = []
|
||||||
|
for i in range(0, len(games["platform"])):
|
||||||
|
try:
|
||||||
|
if games["platform"][i] == " Stadia":
|
||||||
|
index_list.append(i)
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
games.drop(index_list, inplace=True)
|
||||||
|
|
||||||
|
# funkcja usuwająca wiersze zawierające "tbd" w kolumnie "user_review"
|
||||||
|
def delete_tbd(games):
|
||||||
|
index_list = []
|
||||||
|
for i in range(0, len(games["platform"])):
|
||||||
|
try:
|
||||||
|
if games["user_review"][i] == "tbd":
|
||||||
|
index_list.append(i)
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
games.drop(index_list, inplace=True)
|
||||||
|
|
||||||
|
# funkcja zmieniająca kolumnę "user_review" ze stringa na numeric
|
||||||
|
def user_review_to_numeric(games):
|
||||||
|
games["user_review"] = pd.to_numeric(games["user_review"])
|
||||||
|
|
||||||
|
# funkcja normalizująca wartości w kolumnie "meta_score" i "user_review"
|
||||||
|
def normalization(games):
|
||||||
|
games['meta_score'] = games['meta_score'] / 100.0
|
||||||
|
games['user_review'] = games['user_review'] / 10.0
|
||||||
|
|
||||||
|
# funkcja przygotowująca dane metacritic
|
||||||
|
def preparation_meta_plot(games):
|
||||||
|
keys = games["meta_score"].value_counts().keys().sort_values()
|
||||||
|
values = []
|
||||||
|
for k in keys:
|
||||||
|
values.append(games["meta_score"].value_counts()[k])
|
||||||
|
return values
|
||||||
|
|
||||||
|
# funkcja przygotowująca dane użytkowników
|
||||||
|
def preparation_users_plot(games):
|
||||||
|
keys = games["user_review"].value_counts().keys().sort_values()
|
||||||
|
values = []
|
||||||
|
for k in keys:
|
||||||
|
values.append(games["user_review"].value_counts()[k])
|
||||||
|
return values
|
||||||
|
|
||||||
|
# funkcja tworząca wykres
|
||||||
|
def create_plot(games):
|
||||||
|
keys_meta = games["meta_score"].value_counts().keys().sort_values()
|
||||||
|
values_meta = preparation_meta_plot(games)
|
||||||
|
keys_user = games["user_review"].value_counts().keys().sort_values()
|
||||||
|
values_user = preparation_users_plot(games)
|
||||||
|
fig = plt.figure(figsize=(10, 5))
|
||||||
|
plt.plot(keys_meta, values_meta, color="maroon", label = "metacritic")
|
||||||
|
plt.plot(keys_user, values_user, color='blue', label = "użytkownicy")
|
||||||
|
plt.xlabel("Ocena")
|
||||||
|
plt.ylabel("Liczba ocen")
|
||||||
|
plt.title("Oceny metacritic i użytkowników")
|
||||||
|
plt.legend()
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
# funkcja dokonująca podziału na podzbiory train i test
|
||||||
|
def create_train_test(games):
|
||||||
|
games_train, games_test = sklearn.model_selection.train_test_split(games, test_size=5810, random_state=1)
|
||||||
|
return games_train, games_test
|
||||||
|
|
||||||
|
# funkcja pobierająca informacje z danej platformy o ocenach metacritic
|
||||||
|
def get_platform_score_metacritic(games, platform):
|
||||||
|
meta_list = []
|
||||||
|
for i in range(0, len(games["platform"])):
|
||||||
|
try:
|
||||||
|
if games["platform"][i] == platform:
|
||||||
|
meta_list.append(games["meta_score"][i])
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
return meta_list
|
||||||
|
|
||||||
|
# funkcja pobierająca informacje z danej platformy o ocenach użytkowników
|
||||||
|
def get_platform_score_users(games, platform):
|
||||||
|
users_list = []
|
||||||
|
for i in range(0, len(games["platform"])):
|
||||||
|
try:
|
||||||
|
if games["platform"][i] == platform:
|
||||||
|
users_list.append(games["user_review"][i])
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
return users_list
|
||||||
|
|
||||||
|
# funkcja wypisująca informacje o danych powiązanych z daną platformą (wielkość zbioru, średnia, minimum,
|
||||||
|
# maksimum, odchylenie standardowe, mediana)
|
||||||
|
def platform_information(games, platform):
|
||||||
|
list = get_platform_score_metacritic(games, platform)
|
||||||
|
list_users = get_platform_score_users(games, platform)
|
||||||
|
|
||||||
|
print("\n")
|
||||||
|
|
||||||
|
print("Wielkość zbioru dla%s: %.0f" % (platform, len(list)))
|
||||||
|
|
||||||
|
print("\n")
|
||||||
|
|
||||||
|
print("Średnia ocen metacritic dla%s: %.2f" % (platform, np.mean(list)))
|
||||||
|
print("Minimalna ocena metacritic dla%s: %.2f" % (platform, min(list)))
|
||||||
|
print("Maksymalna ocena metacritic dla%s: %.2f" % (platform, max(list)))
|
||||||
|
print("Odchylenie standardowe w ocenach metacritic dla%s: %.2f" % (platform, np.std(list)))
|
||||||
|
print("Mediana ocen metacritic dla%s: %.2f" % (platform, np.median(list)))
|
||||||
|
|
||||||
|
print("\n")
|
||||||
|
|
||||||
|
print("Średnia ocen użytkowników dla%s: %.2f" % (platform, np.mean(list_users)))
|
||||||
|
print("Minimalna ocena użytkowników dla%s: %.2f" % (platform, min(list_users)))
|
||||||
|
print("Maksymalna ocena użytkowników dla%s: %.2f" % (platform, max(list_users)))
|
||||||
|
print("Odchylenie standardowe w ocenach użytkowników dla%s: %.2f" % (platform, np.std(list_users)))
|
||||||
|
print("Mediana ocen użytkowników dla%s: %.2f" % (platform, np.median(list_users)))
|
||||||
|
|
||||||
|
# funkcja wypisująca informacje o danych (wielkość zbioru, średnia, minimum,
|
||||||
|
# maksimum, odchylenie standardowe, mediana, rozkład częstości przykładów)
|
||||||
|
def dataset_information(games):
|
||||||
|
print("Wielkość zbioru: %.0f" % games["meta_score"].describe().loc[['count']][0])
|
||||||
|
|
||||||
|
print("\n")
|
||||||
|
|
||||||
|
print("Średnia ocen metacritic: %.2f" % games["meta_score"].mean())
|
||||||
|
print("Minimalna ocena metacritic: %.2f" % games["meta_score"].describe().loc[['min']][0])
|
||||||
|
print("Maksymalna ocena metacritic: %.2f" % games["meta_score"].describe().loc[['max']][0])
|
||||||
|
print("Odchylenie standardowe w ocenach metacritic: %.2f" % games["meta_score"].describe().loc[['std']][0])
|
||||||
|
print("Mediana ocen metacritic: %.2f" % games["meta_score"].median())
|
||||||
|
|
||||||
|
print("\n")
|
||||||
|
|
||||||
|
print("Średnia ocen użytkowników: %.2f" % games["user_review"].mean())
|
||||||
|
print("Minimalna ocena użytkowników: %.2f" % games["user_review"].describe().loc[['min']][0])
|
||||||
|
print("Maksymalna ocena użytkowników: %.2f" % games["user_review"].describe().loc[['max']][0])
|
||||||
|
print("Odchylenie standardowe w ocenach użytkowników: %.2f" % games["user_review"].describe().loc[['std']][0])
|
||||||
|
print("Mediana ocen użytkowników: %.2f" % games["user_review"].median())
|
||||||
|
|
||||||
|
# wykres
|
||||||
|
create_plot(games)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# wczytanie danych
|
||||||
|
games=pd.read_csv('games.csv')
|
||||||
|
|
||||||
|
# usunięcie 5 wyników "Stadia"
|
||||||
|
delete_stadia(games)
|
||||||
|
|
||||||
|
# usunięcie wyników zawierających "tbd" w kolumnie "user_review"
|
||||||
|
delete_tbd(games)
|
||||||
|
|
||||||
|
# zmiana kolumny "user_review" na numeric
|
||||||
|
user_review_to_numeric(games)
|
||||||
|
|
||||||
|
# normalizacja kolumn "meta_score" i "user_review"
|
||||||
|
normalization(games)
|
||||||
|
|
||||||
|
# wypisanie informacji o danych
|
||||||
|
dataset_information(games)
|
||||||
|
|
||||||
|
# wypisanie informacji o danej platformie
|
||||||
|
platform_information(games, " Xbox 360")
|
||||||
|
|
||||||
|
# podział na podzbiory train i test
|
||||||
|
games_train, games_test = create_train_test(games)
|
Loading…
Reference in New Issue
Block a user