Added new jenkinsfile and kuggle download script

This commit is contained in:
AWieczarek 2024-03-24 15:22:43 +01:00
parent 87002376f2
commit 1abd793e3c
2 changed files with 118 additions and 47 deletions

View File

@ -12,57 +12,75 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 1,
"id": "800bc7a7-aa60-4db8-b170-a5a7340520aa",
"metadata": {},
"metadata": {
"ExecuteTime": {
"start_time": "2024-03-24T15:19:23.899243Z",
"end_time": "2024-03-24T15:19:50.743948Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Defaulting to user installation because normal site-packages is not writeable\n",
"Requirement already satisfied: kaggle in /home/students/s464979/.local/lib/python3.9/site-packages (1.6.6)\n",
"Requirement already satisfied: bleach in /usr/local/lib/python3.9/dist-packages (from kaggle) (5.0.1)\n",
"Requirement already satisfied: certifi in /usr/local/lib/python3.9/dist-packages (from kaggle) (2022.9.14)\n",
"Requirement already satisfied: python-dateutil in /usr/local/lib/python3.9/dist-packages (from kaggle) (2.8.2)\n",
"Requirement already satisfied: python-slugify in /home/students/s464979/.local/lib/python3.9/site-packages (from kaggle) (8.0.4)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from kaggle) (2.28.1)\n",
"Requirement already satisfied: six>=1.10 in /usr/lib/python3/dist-packages (from kaggle) (1.16.0)\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from kaggle) (4.64.1)\n",
"Requirement already satisfied: urllib3 in /usr/local/lib/python3.9/dist-packages (from kaggle) (1.26.12)\n",
"Requirement already satisfied: webencodings in /usr/local/lib/python3.9/dist-packages (from bleach->kaggle) (0.5.1)\n",
"Requirement already satisfied: text-unidecode>=1.3 in /home/students/s464979/.local/lib/python3.9/site-packages (from python-slugify->kaggle) (1.3)\n",
"Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.9/dist-packages (from requests->kaggle) (2.1.1)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->kaggle) (3.4)\n",
"Collecting kaggle\n",
" Downloading kaggle-1.6.6.tar.gz (84 kB)\n",
" ---------------------------------------- 84.6/84.6 kB 2.4 MB/s eta 0:00:00\n",
" Preparing metadata (setup.py): started\n",
" Preparing metadata (setup.py): finished with status 'done'\n",
"Requirement already satisfied: six>=1.10 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from kaggle) (1.16.0)\n",
"Requirement already satisfied: certifi in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from kaggle) (2022.12.7)\n",
"Requirement already satisfied: python-dateutil in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from kaggle) (2.8.2)\n",
"Requirement already satisfied: requests in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from kaggle) (2.28.1)\n",
"Requirement already satisfied: tqdm in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from kaggle) (4.64.1)\n",
"Requirement already satisfied: python-slugify in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from kaggle) (5.0.2)\n",
"Requirement already satisfied: urllib3 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from kaggle) (1.26.14)\n",
"Requirement already satisfied: bleach in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from kaggle) (4.1.0)\n",
"Requirement already satisfied: webencodings in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from bleach->kaggle) (0.5.1)\n",
"Requirement already satisfied: packaging in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from bleach->kaggle) (22.0)\n",
"Requirement already satisfied: text-unidecode>=1.3 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n",
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from requests->kaggle) (3.4)\n",
"Requirement already satisfied: charset-normalizer<3,>=2 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from requests->kaggle) (2.0.4)\n",
"Requirement already satisfied: colorama in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from tqdm->kaggle) (0.4.6)\n",
"Building wheels for collected packages: kaggle\n",
" Building wheel for kaggle (setup.py): started\n",
" Building wheel for kaggle (setup.py): finished with status 'done'\n",
" Created wheel for kaggle: filename=kaggle-1.6.6-py3-none-any.whl size=111955 sha256=23592736409344e3027e92f5ac103680cd5efb348835a123a68118e729e02b66\n",
" Stored in directory: c:\\users\\adamw\\appdata\\local\\pip\\cache\\wheels\\54\\6e\\ff\\d5ab6af2287a2d0c5b8cea9328fb14940ca253fe60214a99c8\n",
"Successfully built kaggle\n",
"Installing collected packages: kaggle\n",
"Successfully installed kaggle-1.6.6\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Defaulting to user installation because normal site-packages is not writeable\n",
"Requirement already satisfied: pandas in /usr/local/lib/python3.9/dist-packages (1.3.5)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.9/dist-packages (from pandas) (2.8.2)\n",
"Requirement already satisfied: pytz>=2017.3 in /usr/lib/python3/dist-packages (from pandas) (2021.1)\n",
"Requirement already satisfied: numpy>=1.17.3 in /home/students/s464979/.local/lib/python3.9/site-packages (from pandas) (1.26.4)\n",
"Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas) (1.16.0)\n",
"Requirement already satisfied: pandas in c:\\users\\adamw\\anaconda3\\lib\\site-packages (1.5.3)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from pandas) (2022.7)\n",
"Requirement already satisfied: numpy>=1.21.0 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from pandas) (1.23.5)\n",
"Requirement already satisfied: python-dateutil>=2.8.1 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from pandas) (2.8.2)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Defaulting to user installation because normal site-packages is not writeable\n",
"Requirement already satisfied: numpy in /home/students/s464979/.local/lib/python3.9/site-packages (1.26.4)\n",
"Requirement already satisfied: numpy in c:\\users\\adamw\\anaconda3\\lib\\site-packages (1.23.5)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Defaulting to user installation because normal site-packages is not writeable\n",
"Requirement already satisfied: scikit-learn in /usr/lib/python3/dist-packages (0.23.2)\n",
"Requirement already satisfied: scikit-learn in c:\\users\\adamw\\anaconda3\\lib\\site-packages (1.2.1)\n",
"Requirement already satisfied: numpy>=1.17.3 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from scikit-learn) (1.23.5)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from scikit-learn) (2.2.0)\n",
"Requirement already satisfied: joblib>=1.1.1 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from scikit-learn) (1.1.1)\n",
"Requirement already satisfied: scipy>=1.3.2 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from scikit-learn) (1.10.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Defaulting to user installation because normal site-packages is not writeable\n",
"Requirement already satisfied: seaborn in /usr/local/lib/python3.9/dist-packages (0.12.0)\n",
"Requirement already satisfied: numpy>=1.17 in /home/students/s464979/.local/lib/python3.9/site-packages (from seaborn) (1.26.4)\n",
"Requirement already satisfied: pandas>=0.25 in /usr/local/lib/python3.9/dist-packages (from seaborn) (1.3.5)\n",
"Requirement already satisfied: matplotlib>=3.1 in /usr/local/lib/python3.9/dist-packages (from seaborn) (3.6.2)\n",
"Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.9/dist-packages (from matplotlib>=3.1->seaborn) (1.0.6)\n",
"Requirement already satisfied: cycler>=0.10 in /usr/lib/python3/dist-packages (from matplotlib>=3.1->seaborn) (0.10.0)\n",
"Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib>=3.1->seaborn) (4.38.0)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in /usr/lib/python3/dist-packages (from matplotlib>=3.1->seaborn) (1.3.1)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib>=3.1->seaborn) (21.3)\n",
"Requirement already satisfied: pillow>=6.2.0 in /home/students/s464979/.local/lib/python3.9/site-packages (from matplotlib>=3.1->seaborn) (10.2.0)\n",
"Requirement already satisfied: pyparsing>=2.2.1 in /usr/lib/python3/dist-packages (from matplotlib>=3.1->seaborn) (2.4.7)\n",
"Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.9/dist-packages (from matplotlib>=3.1->seaborn) (2.8.2)\n",
"Requirement already satisfied: pytz>=2017.3 in /usr/lib/python3/dist-packages (from pandas>=0.25->seaborn) (2021.1)\n",
"Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7->matplotlib>=3.1->seaborn) (1.16.0)\n",
"Requirement already satisfied: seaborn in c:\\users\\adamw\\anaconda3\\lib\\site-packages (0.12.2)\n",
"Requirement already satisfied: numpy!=1.24.0,>=1.17 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from seaborn) (1.23.5)\n",
"Requirement already satisfied: matplotlib!=3.6.1,>=3.1 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from seaborn) (3.7.0)\n",
"Requirement already satisfied: pandas>=0.25 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from seaborn) (1.5.3)\n",
"Requirement already satisfied: pyparsing>=2.3.1 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (3.0.9)\n",
"Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (1.0.5)\n",
"Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (4.25.0)\n",
"Requirement already satisfied: pillow>=6.2.0 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (9.4.0)\n",
"Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (2.8.2)\n",
"Requirement already satisfied: packaging>=20.0 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (22.0)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (0.11.0)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (1.4.4)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from pandas>=0.25->seaborn) (2022.7)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.1->seaborn) (1.16.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
@ -77,10 +95,63 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"id": "f132ca66-2325-48e0-8bf8-ff983d8ad1ce",
"metadata": {},
"outputs": [],
"metadata": {
"ExecuteTime": {
"start_time": "2024-03-24T15:21:05.151558Z",
"end_time": "2024-03-24T15:21:12.115431Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading 1-5-million-beer-reviews-from-beer-advocate.zip to C:\\Users\\adamw\\REPOS\\ium_464979\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
" 0%| | 0.00/32.5M [00:00<?, ?B/s]\n",
" 3%|3 | 1.00M/32.5M [00:00<00:21, 1.53MB/s]\n",
" 6%|6 | 2.00M/32.5M [00:00<00:11, 2.78MB/s]\n",
" 9%|9 | 3.00M/32.5M [00:00<00:07, 3.87MB/s]\n",
" 12%|#2 | 4.00M/32.5M [00:01<00:06, 4.72MB/s]\n",
" 15%|#5 | 5.00M/32.5M [00:01<00:05, 5.20MB/s]\n",
" 18%|#8 | 6.00M/32.5M [00:01<00:05, 5.08MB/s]\n",
" 22%|##1 | 7.00M/32.5M [00:01<00:05, 5.19MB/s]\n",
" 25%|##4 | 8.00M/32.5M [00:01<00:04, 5.21MB/s]\n",
" 28%|##7 | 9.00M/32.5M [00:02<00:04, 5.12MB/s]\n",
" 31%|### | 10.0M/32.5M [00:02<00:04, 5.25MB/s]\n",
" 34%|###3 | 11.0M/32.5M [00:02<00:04, 5.50MB/s]\n",
" 37%|###6 | 12.0M/32.5M [00:02<00:03, 6.10MB/s]\n",
" 40%|#### | 13.0M/32.5M [00:02<00:03, 6.57MB/s]\n",
" 43%|####3 | 14.0M/32.5M [00:02<00:03, 6.39MB/s]\n",
" 46%|####6 | 15.0M/32.5M [00:03<00:03, 6.10MB/s]\n",
" 49%|####9 | 16.0M/32.5M [00:03<00:02, 5.83MB/s]\n",
" 52%|#####2 | 17.0M/32.5M [00:03<00:02, 5.85MB/s]\n",
" 55%|#####5 | 18.0M/32.5M [00:03<00:02, 5.87MB/s]\n",
" 59%|#####8 | 19.0M/32.5M [00:03<00:02, 6.00MB/s]\n",
" 62%|######1 | 20.0M/32.5M [00:03<00:01, 6.79MB/s]\n",
" 65%|######4 | 21.0M/32.5M [00:04<00:01, 7.17MB/s]\n",
" 71%|####### | 23.0M/32.5M [00:04<00:01, 8.01MB/s]\n",
" 74%|#######3 | 24.0M/32.5M [00:04<00:01, 7.80MB/s]\n",
" 77%|#######7 | 25.0M/32.5M [00:04<00:01, 7.72MB/s]\n",
" 80%|######## | 26.0M/32.5M [00:04<00:00, 7.58MB/s]\n",
" 83%|########3 | 27.0M/32.5M [00:05<00:01, 5.54MB/s]\n",
" 86%|########6 | 28.0M/32.5M [00:05<00:00, 5.95MB/s]\n",
" 89%|########9 | 29.0M/32.5M [00:05<00:00, 6.66MB/s]\n",
" 95%|#########5| 31.0M/32.5M [00:05<00:00, 7.50MB/s]\n",
"100%|##########| 32.5M/32.5M [00:05<00:00, 8.35MB/s]\n",
"100%|##########| 32.5M/32.5M [00:05<00:00, 6.00MB/s]\n"
]
}
],
"source": [
"!kaggle datasets download -d thedevastator/1-5-million-beer-reviews-from-beer-advocate"
]

View File

@ -2,11 +2,11 @@
pip install kaggle
kaggle datasets download -d $DATASET_ID
kaggle datasets download -d thedevastator/1-5-million-beer-reviews-from-beer-advocate
unzip -o $DATASET_FILE
unzip -o 1-5-million-beer-reviews-from-beer-advocate.zip
shuf $DATASET_FILE > shuffled_dataset.csv
shuf beer_reviews.csv > shuffled_dataset.csv
split -l 80000 shuffled_dataset.csv train.csv
split -l 10000 train.csv dev.csv