ium_478815/IUM2.ipynb
2022-03-27 20:55:06 +02:00

187 lines
3.4 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "5d8abefd",
"metadata": {},
"outputs": [],
"source": [
"!pip install kaggle\n",
"!pip install pandas\n",
"!pip install seaborn\n",
"!pip install opendatasets --upgrade\n",
"import opendatasets as od\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5911d036",
"metadata": {},
"outputs": [],
"source": [
"cd ~/.kaggle"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8d07bb31",
"metadata": {},
"outputs": [],
"source": [
"!kaggle datasets download -d shree1992/housedata"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9c04f525",
"metadata": {},
"outputs": [],
"source": [
"!unzip -o housedata.zip"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b407a064",
"metadata": {},
"outputs": [],
"source": [
"!wc -l data.csv\n",
"data = pd.read_csv('data.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "17d29b04",
"metadata": {},
"outputs": [],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5fa21ef9",
"metadata": {},
"outputs": [],
"source": [
"!head -n -1 data.csv | shuf > data.csv.shuf"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "baa9b34e",
"metadata": {},
"outputs": [],
"source": [
"!head -n 460 data.csv.shuf > data.csv.test\n",
"!head -n 920 data.csv.shuf | tail -n 460 > data.csv.dev\n",
"!tail -n +920 data.csv.shuf > data.csv.train"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "30b520c0",
"metadata": {},
"outputs": [],
"source": [
"!rm data.csv.shuf\n",
"!wc -l data.csv*"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b7652c69",
"metadata": {},
"outputs": [],
"source": [
"data_test = pd.read_csv('data.csv.test')\n",
"data_dev = pd.read_csv('data.csv.dev')\n",
"data_train = pd.read_csv('data.csv.train')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c4670def",
"metadata": {},
"outputs": [],
"source": [
"data.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b18d32a1",
"metadata": {},
"outputs": [],
"source": [
"data_train.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6b8d6568",
"metadata": {},
"outputs": [],
"source": [
"data_dev.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dd366750",
"metadata": {},
"outputs": [],
"source": [
"data_test.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2801c205",
"metadata": {},
"outputs": [],
"source": [
"grep -P \"^$\" -n data.csv"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}