fix script download

This commit is contained in:
Sheaza 2024-04-02 19:43:25 +02:00
parent 76154a9157
commit 00b57384c2
5 changed files with 26 additions and 3 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/venv/
/kaggle.json

13
Dockerfile Normal file
View File

@ -0,0 +1,13 @@
FROM python:3.11
RUN apt-get update && apt-get -y upgrade
RUN apt-get install -y build-essential
RUN python -m pip install --upgrade pip
COPY requirements.txt /tmp
RUN python -m pip install -r /tmp/requirements.txt
WORKDIR ./app
COPY ./get_dataset.py ./
COPY ./get_stats.py ./

View File

@ -1,10 +1,15 @@
import opendatasets as od
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import sys
import os
od.download("https://www.kaggle.com/datasets/nikhil7280/student-performance-multiple-linear-regression/code")
data = pd.read_csv("student-performance-multiple-linear-regression/Student_Performance.csv")
os.environ["KAGGLE_USERNAME"] = sys.argv[1]
os.environ["KAGGLE_KEY"] = sys.argv[2]
os.system("kaggle datasets download -d nikhil7280/student-performance-multiple-linear-regression --unzip")
data = pd.read_csv("Student_Performance.csv")
print(data.head())
data.drop_duplicates(inplace=True)
data["Extracurricular Activities"] = data["Extracurricular Activities"].replace({'Yes': 1, 'No': 0})

3
requirements.txt Normal file
View File

@ -0,0 +1,3 @@
kaggle
pandas
scikit-learn