creat stats.txt

This commit is contained in:
Mikołaj Pokrywka 2022-04-03 12:52:57 +02:00
parent 39af11487e
commit a381010f85
3 changed files with 16 additions and 5 deletions

View File

@ -11,6 +11,7 @@ RUN pip3 install -r ./requirements.txt
# Skopiujmy nasz skrypt do katalogu /app w kontenerze
COPY ./process_data.sh ./
COPY ./download_data_and_process.py ./
COPY ./stats.py ./
# Domyślne polecenie, które zostanie uruchomione w kontenerze po jego starcie
CMD python -u ./download_data_and_process.py

View File

@ -12,4 +12,4 @@ head -n $CUTOFF data_not_cutted.csv > data.csv
sed -n '1,2500p' data.csv > data_test.csv
sed -n '2501,5000p' data.csv > data_dev.csv
tail -n +5001 data.csv > data_train.csv
rm data.csv real-or-fake-fake-jobposting-prediction.zip column_titles.csv data_not_shuf.csv data_not_cutted.csv
rm data.csv real-or-fake-fake-jobposting-prediction.zip data_not_shuf.csv data_not_cutted.csv

View File

@ -1,11 +1,21 @@
import subprocess
import pandas as pd
import numpy as np
import os
data=pd.read_csv('data_train.csv')
data_2=pd.read_csv('data_dev.csv')
data_3=pd.read_csv('data_test.csv')
data = pd.concat([data, data_2, data_3], axis=0)
path = ''
all_files = ['column_titles.csv', 'data_train.csv', 'data_dev.csv', 'data_test.csv']
data_file = open("data.csv", "w")
for name in all_files:
f = open(name, "r")
data_file.write(f.read())
f.close()
data_file.close()
data=pd.read_csv('data.csv')
data = data.replace(np.nan, '', regex=True)
print("="*20)