creat stats.txt
This commit is contained in:
parent
39af11487e
commit
a381010f85
@ -11,6 +11,7 @@ RUN pip3 install -r ./requirements.txt
|
|||||||
# Skopiujmy nasz skrypt do katalogu /app w kontenerze
|
# Skopiujmy nasz skrypt do katalogu /app w kontenerze
|
||||||
COPY ./process_data.sh ./
|
COPY ./process_data.sh ./
|
||||||
COPY ./download_data_and_process.py ./
|
COPY ./download_data_and_process.py ./
|
||||||
|
COPY ./stats.py ./
|
||||||
|
|
||||||
# Domyślne polecenie, które zostanie uruchomione w kontenerze po jego starcie
|
# Domyślne polecenie, które zostanie uruchomione w kontenerze po jego starcie
|
||||||
CMD python -u ./download_data_and_process.py
|
CMD python -u ./download_data_and_process.py
|
@ -12,4 +12,4 @@ head -n $CUTOFF data_not_cutted.csv > data.csv
|
|||||||
sed -n '1,2500p' data.csv > data_test.csv
|
sed -n '1,2500p' data.csv > data_test.csv
|
||||||
sed -n '2501,5000p' data.csv > data_dev.csv
|
sed -n '2501,5000p' data.csv > data_dev.csv
|
||||||
tail -n +5001 data.csv > data_train.csv
|
tail -n +5001 data.csv > data_train.csv
|
||||||
rm data.csv real-or-fake-fake-jobposting-prediction.zip column_titles.csv data_not_shuf.csv data_not_cutted.csv
|
rm data.csv real-or-fake-fake-jobposting-prediction.zip data_not_shuf.csv data_not_cutted.csv
|
18
stats.py
18
stats.py
@ -1,11 +1,21 @@
|
|||||||
import subprocess
|
import subprocess
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import os
|
||||||
|
|
||||||
data=pd.read_csv('data_train.csv')
|
|
||||||
data_2=pd.read_csv('data_dev.csv')
|
path = ''
|
||||||
data_3=pd.read_csv('data_test.csv')
|
|
||||||
data = pd.concat([data, data_2, data_3], axis=0)
|
all_files = ['column_titles.csv', 'data_train.csv', 'data_dev.csv', 'data_test.csv']
|
||||||
|
|
||||||
|
data_file = open("data.csv", "w")
|
||||||
|
for name in all_files:
|
||||||
|
f = open(name, "r")
|
||||||
|
data_file.write(f.read())
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
data_file.close()
|
||||||
|
data=pd.read_csv('data.csv')
|
||||||
data = data.replace(np.nan, '', regex=True)
|
data = data.replace(np.nan, '', regex=True)
|
||||||
|
|
||||||
print("="*20)
|
print("="*20)
|
||||||
|
Loading…
Reference in New Issue
Block a user