wip
This commit is contained in:
parent
90c97b64d2
commit
3c80804774
1
Jenkinsfile
vendored
1
Jenkinsfile
vendored
@ -28,6 +28,7 @@ pipeline {
|
||||
steps {
|
||||
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}" ]) {
|
||||
sh "./process_data.sh"
|
||||
archiveArtifacts artifacts: "data_test.csv, data_dev.csv, data_train.csv"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
1
column_titles.csv
Normal file
1
column_titles.csv
Normal file
@ -0,0 +1 @@
|
||||
job_id,title,location,department,salary_range,company_profile,description,requirements,benefits,telecommuting,has_company_logo,has_questions,employment_type,required_experience,required_education,industry,function,fraudulent
|
|
0
data_dev.csv
Normal file
0
data_dev.csv
Normal file
|
17880
data_not_cutted.csv
Normal file
17880
data_not_cutted.csv
Normal file
File diff suppressed because one or more lines are too long
17880
data_not_shuf.csv
Normal file
17880
data_not_shuf.csv
Normal file
File diff suppressed because one or more lines are too long
0
data_test.csv
Normal file
0
data_test.csv
Normal file
|
0
data_train.csv
Normal file
0
data_train.csv
Normal file
|
@ -1528,7 +1528,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
"version": "3.8.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -1,4 +1,13 @@
|
||||
#!/bin/bash
|
||||
echo "welcome"
|
||||
ls
|
||||
echo "this is the whole list of dir"
|
||||
echo "Download data from kaggle"
|
||||
kaggle datasets download -d shivamb/real-or-fake-fake-jobposting-prediction
|
||||
unzip -o real-or-fake-fake-jobposting-prediction.zip
|
||||
echo "Save column titles"
|
||||
head -n 1 fake_job_postings.csv > column_titles.csv
|
||||
tail -n +2 fake_job_postings.csv > data_not_shuf.csv
|
||||
echo "Create sets"
|
||||
shuf data_not_shuf.csv > data_not_cutted.csv
|
||||
head -n $1 data_not_cutted.csv > data.csv
|
||||
sed -n '1,2500p' data.csv > data_test.csv
|
||||
sed -n '2501,5000p' data.csv > data_dev.csv
|
||||
tail -n +5001 data.csv > data_train.csv
|
BIN
real-or-fake-fake-jobposting-prediction.zip
Normal file
BIN
real-or-fake-fake-jobposting-prediction.zip
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user