creat stats.txt
This commit is contained in:
parent
39af11487e
commit
a381010f85
@ -11,6 +11,7 @@ RUN pip3 install -r ./requirements.txt
|
||||
# Skopiujmy nasz skrypt do katalogu /app w kontenerze
|
||||
COPY ./process_data.sh ./
|
||||
COPY ./download_data_and_process.py ./
|
||||
COPY ./stats.py ./
|
||||
|
||||
# Domyślne polecenie, które zostanie uruchomione w kontenerze po jego starcie
|
||||
CMD python -u ./download_data_and_process.py
|
@ -12,4 +12,4 @@ head -n $CUTOFF data_not_cutted.csv > data.csv
|
||||
sed -n '1,2500p' data.csv > data_test.csv
|
||||
sed -n '2501,5000p' data.csv > data_dev.csv
|
||||
tail -n +5001 data.csv > data_train.csv
|
||||
rm data.csv real-or-fake-fake-jobposting-prediction.zip column_titles.csv data_not_shuf.csv data_not_cutted.csv
|
||||
rm data.csv real-or-fake-fake-jobposting-prediction.zip data_not_shuf.csv data_not_cutted.csv
|
18
stats.py
18
stats.py
@ -1,11 +1,21 @@
|
||||
import subprocess
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
data=pd.read_csv('data_train.csv')
|
||||
data_2=pd.read_csv('data_dev.csv')
|
||||
data_3=pd.read_csv('data_test.csv')
|
||||
data = pd.concat([data, data_2, data_3], axis=0)
|
||||
|
||||
path = ''
|
||||
|
||||
all_files = ['column_titles.csv', 'data_train.csv', 'data_dev.csv', 'data_test.csv']
|
||||
|
||||
data_file = open("data.csv", "w")
|
||||
for name in all_files:
|
||||
f = open(name, "r")
|
||||
data_file.write(f.read())
|
||||
f.close()
|
||||
|
||||
data_file.close()
|
||||
data=pd.read_csv('data.csv')
|
||||
data = data.replace(np.nan, '', regex=True)
|
||||
|
||||
print("="*20)
|
||||
|
Loading…
Reference in New Issue
Block a user