changing dataExtraction file, changing Json files to csv
This commit is contained in:
parent
5423eefd8b
commit
9bf299c778
3762
data/la_liga_events_csv/test.csv
Normal file
3762
data/la_liga_events_csv/test.csv
Normal file
File diff suppressed because it is too large
Load Diff
@ -3,20 +3,125 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {},
|
||||||
"vscode": {
|
"outputs": [],
|
||||||
"languageId": "plaintext"
|
"source": []
|
||||||
}
|
},
|
||||||
},
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"sdas"
|
"# librarys:\n",
|
||||||
|
"\n",
|
||||||
|
"import json\n",
|
||||||
|
"import csv\n",
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"import pandas as pd\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 16,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# data extraction function \n",
|
||||||
|
"#\n",
|
||||||
|
"# input - file with events\n",
|
||||||
|
"# output - file with only goals events\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"# unfinished\n",
|
||||||
|
"def goals_extraction(file):\n",
|
||||||
|
" return file"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 13,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"ename": "UnsupportedOperation",
|
||||||
|
"evalue": "not readable",
|
||||||
|
"output_type": "error",
|
||||||
|
"traceback": [
|
||||||
|
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||||
|
"\u001b[1;31mUnsupportedOperation\u001b[0m Traceback (most recent call last)",
|
||||||
|
"\u001b[1;32mc:\\Users\\MSi\\Desktop\\data_repo\\fantastyczne_gole\\notebooks\\dataExtraction.ipynb Cell 4\u001b[0m line \u001b[0;36m3\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/MSi/Desktop/data_repo/fantastyczne_gole/notebooks/dataExtraction.ipynb#W2sZmlsZQ%3D%3D?line=28'>29</a>\u001b[0m count \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/MSi/Desktop/data_repo/fantastyczne_gole/notebooks/dataExtraction.ipynb#W2sZmlsZQ%3D%3D?line=29'>30</a>\u001b[0m csv_writer\u001b[39m.\u001b[39mwriterow(data\u001b[39m.\u001b[39mvalues())\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/Users/MSi/Desktop/data_repo/fantastyczne_gole/notebooks/dataExtraction.ipynb#W2sZmlsZQ%3D%3D?line=31'>32</a>\u001b[0m df_temp \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39;49mread_csv(csvFile, index_col\u001b[39m=\u001b[39;49m\u001b[39mNone\u001b[39;49;00m, header\u001b[39m=\u001b[39;49m\u001b[39m0\u001b[39;49m)\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/MSi/Desktop/data_repo/fantastyczne_gole/notebooks/dataExtraction.ipynb#W2sZmlsZQ%3D%3D?line=32'>33</a>\u001b[0m csvFile\u001b[39m.\u001b[39mclose()\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/MSi/Desktop/data_repo/fantastyczne_gole/notebooks/dataExtraction.ipynb#W2sZmlsZQ%3D%3D?line=34'>35</a>\u001b[0m df_temp\n",
|
||||||
|
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\pandas\\io\\parsers\\readers.py:912\u001b[0m, in \u001b[0;36mread_csv\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[0;32m 899\u001b[0m kwds_defaults \u001b[39m=\u001b[39m _refine_defaults_read(\n\u001b[0;32m 900\u001b[0m dialect,\n\u001b[0;32m 901\u001b[0m delimiter,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 908\u001b[0m dtype_backend\u001b[39m=\u001b[39mdtype_backend,\n\u001b[0;32m 909\u001b[0m )\n\u001b[0;32m 910\u001b[0m kwds\u001b[39m.\u001b[39mupdate(kwds_defaults)\n\u001b[1;32m--> 912\u001b[0m \u001b[39mreturn\u001b[39;00m _read(filepath_or_buffer, kwds)\n",
|
||||||
|
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\pandas\\io\\parsers\\readers.py:577\u001b[0m, in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m 574\u001b[0m _validate_names(kwds\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mnames\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m))\n\u001b[0;32m 576\u001b[0m \u001b[39m# Create the parser.\u001b[39;00m\n\u001b[1;32m--> 577\u001b[0m parser \u001b[39m=\u001b[39m TextFileReader(filepath_or_buffer, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwds)\n\u001b[0;32m 579\u001b[0m \u001b[39mif\u001b[39;00m chunksize \u001b[39mor\u001b[39;00m iterator:\n\u001b[0;32m 580\u001b[0m \u001b[39mreturn\u001b[39;00m parser\n",
|
||||||
|
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\pandas\\io\\parsers\\readers.py:1407\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m 1404\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39moptions[\u001b[39m\"\u001b[39m\u001b[39mhas_index_names\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m kwds[\u001b[39m\"\u001b[39m\u001b[39mhas_index_names\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[0;32m 1406\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles: IOHandles \u001b[39m|\u001b[39m \u001b[39mNone\u001b[39;00m \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m-> 1407\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_engine \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_make_engine(f, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mengine)\n",
|
||||||
|
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\pandas\\io\\parsers\\readers.py:1679\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[1;34m(self, f, engine)\u001b[0m\n\u001b[0;32m 1676\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(msg)\n\u001b[0;32m 1678\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m-> 1679\u001b[0m \u001b[39mreturn\u001b[39;00m mapping[engine](f, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39moptions)\n\u001b[0;32m 1680\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m:\n\u001b[0;32m 1681\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n",
|
||||||
|
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\pandas\\io\\parsers\\c_parser_wrapper.py:93\u001b[0m, in \u001b[0;36mCParserWrapper.__init__\u001b[1;34m(self, src, **kwds)\u001b[0m\n\u001b[0;32m 90\u001b[0m \u001b[39mif\u001b[39;00m kwds[\u001b[39m\"\u001b[39m\u001b[39mdtype_backend\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mpyarrow\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[0;32m 91\u001b[0m \u001b[39m# Fail here loudly instead of in cython after reading\u001b[39;00m\n\u001b[0;32m 92\u001b[0m import_optional_dependency(\u001b[39m\"\u001b[39m\u001b[39mpyarrow\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m---> 93\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_reader \u001b[39m=\u001b[39m parsers\u001b[39m.\u001b[39mTextReader(src, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwds)\n\u001b[0;32m 95\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39munnamed_cols \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_reader\u001b[39m.\u001b[39munnamed_cols\n\u001b[0;32m 97\u001b[0m \u001b[39m# error: Cannot determine type of 'names'\u001b[39;00m\n",
|
||||||
|
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\pandas\\_libs\\parsers.pyx:548\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader.__cinit__\u001b[1;34m()\u001b[0m\n",
|
||||||
|
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\pandas\\_libs\\parsers.pyx:637\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader._get_header\u001b[1;34m()\u001b[0m\n",
|
||||||
|
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\pandas\\_libs\\parsers.pyx:848\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader._tokenize_rows\u001b[1;34m()\u001b[0m\n",
|
||||||
|
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\pandas\\_libs\\parsers.pyx:859\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader._check_tokenize_status\u001b[1;34m()\u001b[0m\n",
|
||||||
|
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\pandas\\_libs\\parsers.pyx:2014\u001b[0m, in \u001b[0;36mpandas._libs.parsers.raise_parser_error\u001b[1;34m()\u001b[0m\n",
|
||||||
|
"\u001b[1;31mUnsupportedOperation\u001b[0m: not readable"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# os path\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"#path = Path('data','la_liga_events','7298.json')\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def openGameFile(name):\n",
|
||||||
|
" path = os.path.join(\"..\",\"data\",\"la_liga_events\",name)\n",
|
||||||
|
" f = open(path)\n",
|
||||||
|
" data = json.load(f)\n",
|
||||||
|
" return data\n",
|
||||||
|
"\n",
|
||||||
|
"name = \"7298.json\"\n",
|
||||||
|
"\n",
|
||||||
|
"jsonFile = openGameFile(name)\n",
|
||||||
|
"\n",
|
||||||
|
"csvFilePath = os.path.join(\"..\",\"data\",\"la_liga_events_csv\",\"test.csv\")\n",
|
||||||
|
"csvFile = open(csvFilePath, 'w', newline='')\n",
|
||||||
|
"\n",
|
||||||
|
"csv_writer = csv.writer(csvFile)\n",
|
||||||
|
"\n",
|
||||||
|
"count = 0\n",
|
||||||
|
"\n",
|
||||||
|
"for data in jsonFile:\n",
|
||||||
|
" if count == 0:\n",
|
||||||
|
" header = data.keys()\n",
|
||||||
|
" csv_writer.writerow(header)\n",
|
||||||
|
" count += 1\n",
|
||||||
|
" csv_writer.writerow(data.values())\n",
|
||||||
|
" \n",
|
||||||
|
"df_temp = pd.read_csv(csvFile, index_col=None, header=0)\n",
|
||||||
|
"csvFile.close()\n",
|
||||||
|
"\n",
|
||||||
|
"df_temp"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"name": "python"
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.11"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
Loading…
Reference in New Issue
Block a user