Dodanie dockerfile

This commit is contained in:
Anna Nowak 2021-04-11 17:20:04 +02:00
parent 98001262bc
commit cbdebbbd53
7 changed files with 62 additions and 6 deletions

1
.gitignore vendored
View File

@ -59,3 +59,4 @@ docs/source/changelog.md
fifa19*
*.csv
stat.txt
.venv/

10
Dockerfile Normal file
View File

@ -0,0 +1,10 @@
FROM ubuntu:latest
RUN apt update && apt install -y python3.8 && apt install -y python3-pip
WORKDIR /app
COPY ["requirements.txt", "./"]
RUN pip3 install -r requirements.txt
COPY ["Zadanie 1.py", "."]
CMD python3.8 "Zadanie 1.py"

2
Jenkinsfile vendored
View File

@ -42,3 +42,5 @@ pipeline {
}
}
}
//docker run -e KAGGLE_KEY -e KAGGLE_USERNAME -ti ium_434760:latest

View File

@ -1,2 +1,3 @@
# ium_434760
## Uruchomienie
docker run -e KAGGLE_KEY -e KAGGLE_USERNAME -ti adnovac/ium_s434760:latest

View File

@ -45,7 +45,9 @@
{
"cell_type": "code",
"execution_count": 208,
"metadata": {},
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
@ -78,6 +80,8 @@
"outputs": [],
"source": [
"import zipfile\n",
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"with zipfile.ZipFile('fifa19.zip', 'r') as zip_ref:\n",
" zip_ref.extractall('.')"
@ -136,8 +140,7 @@
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"\n",
"df=pd.read_csv('data.csv')\n",
"train, dev = train_test_split(df, train_size=0.6, test_size=0.4, shuffle=True)\n",

35
Zadanie 1.py Normal file
View File

@ -0,0 +1,35 @@
import zipfile
import os
import pandas as pd
from sklearn.model_selection import train_test_split
if os.getenv("KAGGLE_KEY") is None or os.getenv("KAGGLE_USERNAME") is None:
print("Brak zmiennych środowiskowych KAGGLE_KEY lub KAAGLE_USERNAME")
exit()
if not os.path.isfile('fifa19.zip'):
os.system('kaggle datasets download -d karangadiya/fifa19')
with zipfile.ZipFile('fifa19.zip', 'r') as zip_ref:
zip_ref.extractall('.')
df=pd.read_csv('data.csv')
df = df[df["Release Clause"].notna()]
df = df[df["Release Clause"].notnull()]
if df["Overall"].mean() > 1:
df["Overall"]= df["Overall"]/100
df["Release Clause"] = df["Release Clause"].str.replace("", "")
df["Release Clause"] = (df["Release Clause"].replace(r'[KM]+$', '', regex=True).astype(float) *
df["Release Clause"].str.extract(r'[\d\.]+([KM]+)', expand=False)
.replace(['K','M'], [1000, 1000000]).astype(int))
df.to_csv('data.csv')
train, dev = train_test_split(df, train_size=0.6, test_size=0.4, shuffle=True)
dev, test = train_test_split(dev, train_size=0.5, test_size=0.5, shuffle=False)
test.to_csv('test.csv')
dev.to_csv('dev.csv')
train.to_csv('train.csv')

4
requirements.txt Normal file
View File

@ -0,0 +1,4 @@
kaggle
pandas
numpy
sklearn