cleanup
This commit is contained in:
parent
3f3d593e77
commit
3aefa5d8cd
@ -15,15 +15,8 @@ RUN python3 -m pip install pandas
|
||||
|
||||
RUN python3 -m pip freeze
|
||||
|
||||
# ENV PATH="/root/.local/bin:${PATH}"
|
||||
|
||||
COPY . .
|
||||
|
||||
# COPY ./figlet-loop.sh ./
|
||||
# COPY ./download.sh ./
|
||||
# COPY ./script.py ./
|
||||
# COPY ./kaggle.json /root/.kaggle/kaggle.json
|
||||
|
||||
ARG KAGGLE_USERNAME=testKAGGLE_USERNAME
|
||||
ARG KAGGLE_KEY=test1KAGGLE_KEY
|
||||
|
||||
|
@ -1,24 +0,0 @@
|
||||
FROM ubuntu:latest
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && apt-get install -y figlet python3 python3-pip unzip
|
||||
RUN pip3 install kaggle
|
||||
RUN pip3 install pandas
|
||||
|
||||
# ENV PATH="/root/.local/bin:${PATH}"
|
||||
|
||||
COPY . .
|
||||
COPY ./figlet-loop.sh ./
|
||||
COPY ./download.sh ./
|
||||
COPY ./script.py ./
|
||||
# COPY ./kaggle.json /root/.kaggle/kaggle.json
|
||||
|
||||
ARG KAGGLE_USERNAME=testKAGGLE_USERNAME
|
||||
ARG KAGGLE_KEY=test1KAGGLE_KEY
|
||||
|
||||
RUN pip freeze
|
||||
RUN chmod u+x ./script.py
|
||||
|
||||
# RUN ./download.sh 117928
|
||||
# RUN python3 ./script.py
|
22
script.py
22
script.py
@ -35,25 +35,25 @@ def divide_dataset(dataset):
|
||||
os.system('tail -n +2 Car_Prices_Poland_Kaggle.csv | shuf > ./Car_Prices_Poland_Kaggle_shuf.csv')
|
||||
|
||||
len1 = len(dataset) // 6
|
||||
len2 = (len1 * 2) +1
|
||||
len2 = (len1 * 2) + 1
|
||||
|
||||
os.system(f'head -n {len1} Car_Prices_Poland_Kaggle_shuf.csv f > Car_Prices_Poland_Kaggle_dev.csv')
|
||||
os.system(f'head -n {len1} Car_Prices_Poland_Kaggle_shuf.csv| tail -n {len1} > Car_Prices_Poland_Kaggle_test.csv')
|
||||
os.system(f'tail -n +{len2} Car_Prices_Poland_Kaggle_shuf.csv > Car_Prices_Poland_Kaggle_train.csv')
|
||||
os.system(f'head -n {len1} Car_Prices_Poland_Kaggle_shuf.csv > Car_Prices_Poland_Kaggle_dev.csv')
|
||||
os.system(f'head -n {len1} Car_Prices_Poland_Kaggle_shuf.csv | tail -n {len1} > Car_Prices_Poland_Kaggle_test.csv')
|
||||
os.system(f'tail -n +{len2} Car_Prices_Poland_Kaggle_shuf.csv > Car_Prices_Poland_Kaggle_train.csv')
|
||||
|
||||
os.system('rm ./Car_Prices_Poland_Kaggle_shuf.csv')
|
||||
print("Len match: " + str(sum([len1 * 2, len2]) == len(dataset)))
|
||||
os.system('cat Car_Prices_Poland_Kaggle_train.csv | wc -l')
|
||||
os.system('cat Car_Prices_Poland_Kaggle_dev.csv | wc -l')
|
||||
os.system('cat Car_Prices_Poland_Kaggle_test.csv | wc -l')
|
||||
|
||||
|
||||
print('Dataset devided')
|
||||
|
||||
|
||||
def get_statistics(dataset):
|
||||
"""Mean, min, max, median etc."""
|
||||
|
||||
print(f'--------------- Dataset length ---------------')
|
||||
print(f'--------------- Normalized dataset length ---------------')
|
||||
print(len(dataset))
|
||||
|
||||
print(f'---------------Describe dataset---------------')
|
||||
@ -64,6 +64,9 @@ def get_statistics(dataset):
|
||||
def normalize_dataset(dataset):
|
||||
"""Drop unnecessary columns and set numeric values to [0,1] range"""
|
||||
|
||||
print(f'--------------- Initial dataset length ---------------')
|
||||
print(len(dataset))
|
||||
|
||||
# drop columns
|
||||
dataset.drop(columns=["Unnamed: 0", "generation_name"], inplace=True)
|
||||
dataset = dataset.dropna()
|
||||
@ -76,9 +79,6 @@ def normalize_dataset(dataset):
|
||||
return dataset
|
||||
|
||||
|
||||
# install_dependencies()
|
||||
|
||||
|
||||
print(os.system('python3 -m pip freeze'))
|
||||
|
||||
download_dataset()
|
||||
@ -88,7 +88,3 @@ df = pd.DataFrame(cars)
|
||||
df = normalize_dataset(df)
|
||||
divide_dataset(df)
|
||||
get_statistics(df)
|
||||
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user