changing dataExtraction file, changing Json files to csv

This commit is contained in:
unknown 2023-11-21 17:33:42 +01:00
parent 5423eefd8b
commit 9bf299c778
2 changed files with 3874 additions and 7 deletions

File diff suppressed because it is too large Load Diff

View File

@ -3,20 +3,125 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"vscode": { "outputs": [],
"languageId": "plaintext" "source": []
} },
}, {
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"sdas" "# librarys:\n",
"\n",
"import json\n",
"import csv\n",
"import os\n",
"\n",
"import pandas as pd\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"# data extraction function \n",
"#\n",
"# input - file with events\n",
"# output - file with only goals events\n",
"\n",
"\n",
"# unfinished\n",
"def goals_extraction(file):\n",
" return file"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"ename": "UnsupportedOperation",
"evalue": "not readable",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mUnsupportedOperation\u001b[0m Traceback (most recent call last)",
"\u001b[1;32mc:\\Users\\MSi\\Desktop\\data_repo\\fantastyczne_gole\\notebooks\\dataExtraction.ipynb Cell 4\u001b[0m line \u001b[0;36m3\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/MSi/Desktop/data_repo/fantastyczne_gole/notebooks/dataExtraction.ipynb#W2sZmlsZQ%3D%3D?line=28'>29</a>\u001b[0m count \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/MSi/Desktop/data_repo/fantastyczne_gole/notebooks/dataExtraction.ipynb#W2sZmlsZQ%3D%3D?line=29'>30</a>\u001b[0m csv_writer\u001b[39m.\u001b[39mwriterow(data\u001b[39m.\u001b[39mvalues())\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/Users/MSi/Desktop/data_repo/fantastyczne_gole/notebooks/dataExtraction.ipynb#W2sZmlsZQ%3D%3D?line=31'>32</a>\u001b[0m df_temp \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39;49mread_csv(csvFile, index_col\u001b[39m=\u001b[39;49m\u001b[39mNone\u001b[39;49;00m, header\u001b[39m=\u001b[39;49m\u001b[39m0\u001b[39;49m)\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/MSi/Desktop/data_repo/fantastyczne_gole/notebooks/dataExtraction.ipynb#W2sZmlsZQ%3D%3D?line=32'>33</a>\u001b[0m csvFile\u001b[39m.\u001b[39mclose()\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/MSi/Desktop/data_repo/fantastyczne_gole/notebooks/dataExtraction.ipynb#W2sZmlsZQ%3D%3D?line=34'>35</a>\u001b[0m df_temp\n",
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\pandas\\io\\parsers\\readers.py:912\u001b[0m, in \u001b[0;36mread_csv\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[0;32m 899\u001b[0m kwds_defaults \u001b[39m=\u001b[39m _refine_defaults_read(\n\u001b[0;32m 900\u001b[0m dialect,\n\u001b[0;32m 901\u001b[0m delimiter,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 908\u001b[0m dtype_backend\u001b[39m=\u001b[39mdtype_backend,\n\u001b[0;32m 909\u001b[0m )\n\u001b[0;32m 910\u001b[0m kwds\u001b[39m.\u001b[39mupdate(kwds_defaults)\n\u001b[1;32m--> 912\u001b[0m \u001b[39mreturn\u001b[39;00m _read(filepath_or_buffer, kwds)\n",
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\pandas\\io\\parsers\\readers.py:577\u001b[0m, in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m 574\u001b[0m _validate_names(kwds\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mnames\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m))\n\u001b[0;32m 576\u001b[0m \u001b[39m# Create the parser.\u001b[39;00m\n\u001b[1;32m--> 577\u001b[0m parser \u001b[39m=\u001b[39m TextFileReader(filepath_or_buffer, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwds)\n\u001b[0;32m 579\u001b[0m \u001b[39mif\u001b[39;00m chunksize \u001b[39mor\u001b[39;00m iterator:\n\u001b[0;32m 580\u001b[0m \u001b[39mreturn\u001b[39;00m parser\n",
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\pandas\\io\\parsers\\readers.py:1407\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m 1404\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39moptions[\u001b[39m\"\u001b[39m\u001b[39mhas_index_names\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m kwds[\u001b[39m\"\u001b[39m\u001b[39mhas_index_names\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[0;32m 1406\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles: IOHandles \u001b[39m|\u001b[39m \u001b[39mNone\u001b[39;00m \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m-> 1407\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_engine \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_make_engine(f, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mengine)\n",
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\pandas\\io\\parsers\\readers.py:1679\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[1;34m(self, f, engine)\u001b[0m\n\u001b[0;32m 1676\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(msg)\n\u001b[0;32m 1678\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m-> 1679\u001b[0m \u001b[39mreturn\u001b[39;00m mapping[engine](f, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39moptions)\n\u001b[0;32m 1680\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m:\n\u001b[0;32m 1681\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n",
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\pandas\\io\\parsers\\c_parser_wrapper.py:93\u001b[0m, in \u001b[0;36mCParserWrapper.__init__\u001b[1;34m(self, src, **kwds)\u001b[0m\n\u001b[0;32m 90\u001b[0m \u001b[39mif\u001b[39;00m kwds[\u001b[39m\"\u001b[39m\u001b[39mdtype_backend\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mpyarrow\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[0;32m 91\u001b[0m \u001b[39m# Fail here loudly instead of in cython after reading\u001b[39;00m\n\u001b[0;32m 92\u001b[0m import_optional_dependency(\u001b[39m\"\u001b[39m\u001b[39mpyarrow\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m---> 93\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_reader \u001b[39m=\u001b[39m parsers\u001b[39m.\u001b[39mTextReader(src, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwds)\n\u001b[0;32m 95\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39munnamed_cols \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_reader\u001b[39m.\u001b[39munnamed_cols\n\u001b[0;32m 97\u001b[0m \u001b[39m# error: Cannot determine type of 'names'\u001b[39;00m\n",
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\pandas\\_libs\\parsers.pyx:548\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader.__cinit__\u001b[1;34m()\u001b[0m\n",
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\pandas\\_libs\\parsers.pyx:637\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader._get_header\u001b[1;34m()\u001b[0m\n",
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\pandas\\_libs\\parsers.pyx:848\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader._tokenize_rows\u001b[1;34m()\u001b[0m\n",
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\pandas\\_libs\\parsers.pyx:859\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader._check_tokenize_status\u001b[1;34m()\u001b[0m\n",
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\pandas\\_libs\\parsers.pyx:2014\u001b[0m, in \u001b[0;36mpandas._libs.parsers.raise_parser_error\u001b[1;34m()\u001b[0m\n",
"\u001b[1;31mUnsupportedOperation\u001b[0m: not readable"
]
}
],
"source": [
"# os path\n",
"\n",
"\n",
"\n",
"#path = Path('data','la_liga_events','7298.json')\n",
"\n",
"\n",
"def openGameFile(name):\n",
" path = os.path.join(\"..\",\"data\",\"la_liga_events\",name)\n",
" f = open(path)\n",
" data = json.load(f)\n",
" return data\n",
"\n",
"name = \"7298.json\"\n",
"\n",
"jsonFile = openGameFile(name)\n",
"\n",
"csvFilePath = os.path.join(\"..\",\"data\",\"la_liga_events_csv\",\"test.csv\")\n",
"csvFile = open(csvFilePath, 'w', newline='')\n",
"\n",
"csv_writer = csv.writer(csvFile)\n",
"\n",
"count = 0\n",
"\n",
"for data in jsonFile:\n",
" if count == 0:\n",
" header = data.keys()\n",
" csv_writer.writerow(header)\n",
" count += 1\n",
" csv_writer.writerow(data.values())\n",
" \n",
"df_temp = pd.read_csv(csvFile, index_col=None, header=0)\n",
"csvFile.close()\n",
"\n",
"df_temp"
] ]
} }
], ],
"metadata": { "metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": { "language_info": {
"name": "python" "codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
} }
}, },
"nbformat": 4, "nbformat": 4,