feat: jenkinsfile2 with docker image from dockerhub
This commit is contained in:
parent
faa5898794
commit
1b74870dd0
26
Jenkinsfile2
26
Jenkinsfile2
@ -15,22 +15,30 @@ node {
|
|||||||
//cloning git repo
|
//cloning git repo
|
||||||
checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 's424714', url: 'https://git.wmi.amu.edu.pl/s424714/ium_424714']]])
|
checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 's424714', url: 'https://git.wmi.amu.edu.pl/s424714/ium_424714']]])
|
||||||
}
|
}
|
||||||
stage('cloning artifacts') {
|
|
||||||
//coping artifacts
|
|
||||||
copyArtifacts fingerprintArtifacts: true, projectName: 's424714-create-dataset', selector: buildParameter('BUILD_SELECTOR')
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
stage('Shell Script') {
|
stage('DockerHUB build') {
|
||||||
// executing bash scripts
|
|
||||||
sh "chmod +x -R ${env.WORKSPACE}"
|
sh "chmod +x -R ${env.WORKSPACE}"
|
||||||
sh "./stats-dataset.sh"
|
|
||||||
|
|
||||||
|
docker.image('drfifonz/ium_s424714:1.1').inside {
|
||||||
|
stage("DOCKER: cloning artifacts"){
|
||||||
|
copyArtifacts fingerprintArtifacts: true, projectName: 's424714-create-dataset', selector: buildParameter('BUILD_SELECTOR')
|
||||||
|
sh 'mkdir -p ./data/dataset'
|
||||||
|
sh 'mv dataset.csv ./data/dataset'
|
||||||
|
}
|
||||||
|
stage("DOCKER: Running stats script"){
|
||||||
|
sh 'python ./dataset.py --stats'
|
||||||
|
sh "cp ./data/stats.csv ${WORKSPACE}"
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
stage('Saving artefacts') {
|
stage('Saving artefacts') {
|
||||||
echo 'Goodbye!'
|
echo 'Goodbye!'
|
||||||
archiveArtifacts 'data/stats.txt'
|
archiveArtifacts 'stats.csv'
|
||||||
}
|
}
|
||||||
}
|
}
|
45
dataset.py
45
dataset.py
@ -13,29 +13,37 @@ parser.add_argument("--stats", action="store_true", default=False)
|
|||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
TRUE_NEWS_PATH = Path("data/True.csv")
|
if not (args.dataset or args.stats):
|
||||||
FAKE_NEWS_PATH = Path("data/Fake.csv")
|
raise ValueError("NO RUN TYPE SPECIFIED")
|
||||||
|
|
||||||
DATA_PATH = TRUE_NEWS_PATH.parent
|
if args.dataset:
|
||||||
DATASET_PATH = DATA_PATH / "dataset"
|
TRUE_NEWS_PATH = Path("data/True.csv")
|
||||||
|
FAKE_NEWS_PATH = Path("data/Fake.csv")
|
||||||
|
|
||||||
# loading datasets
|
DATA_PATH = TRUE_NEWS_PATH.parent
|
||||||
true_news = pd.read_csv(TRUE_NEWS_PATH)
|
DATASET_PATH = DATA_PATH / "dataset"
|
||||||
fake_news = pd.read_csv(FAKE_NEWS_PATH)
|
|
||||||
|
|
||||||
# clearing dataset
|
# loading datasets
|
||||||
true_news = true_news.drop(columns=["title", "subject", "date"])
|
true_news = pd.read_csv(TRUE_NEWS_PATH)
|
||||||
fake_news = fake_news.drop(columns=["title", "subject", "date"])
|
fake_news = pd.read_csv(FAKE_NEWS_PATH)
|
||||||
|
|
||||||
# setting binary classification
|
# clearing dataset
|
||||||
true_news["Value"] = 1
|
true_news = true_news.drop(columns=["title", "subject", "date"])
|
||||||
fake_news["Value"] = 0
|
fake_news = fake_news.drop(columns=["title", "subject", "date"])
|
||||||
|
|
||||||
# merging dataset
|
# setting binary classification
|
||||||
dataset = pd.concat([true_news, fake_news], axis=0)
|
true_news["Value"] = 1
|
||||||
dataset["text"] = dataset["text"].str.strip()
|
fake_news["Value"] = 0
|
||||||
dataset.dropna(axis=0, how="any", inplace=False, subset=["text"])
|
|
||||||
|
|
||||||
|
# merging dataset
|
||||||
|
dataset = pd.concat([true_news, fake_news], axis=0)
|
||||||
|
dataset["text"] = dataset["text"].str.strip()
|
||||||
|
dataset.dropna(axis=0, how="any", inplace=False, subset=["text"])
|
||||||
|
|
||||||
|
elif args.stats:
|
||||||
|
DATA_PATH = Path("data")
|
||||||
|
DATASET_FILE_PATH = DATA_PATH / "dataset/dataset.csv"
|
||||||
|
dataset = pd.read_csv(DATASET_FILE_PATH)
|
||||||
# spliting dataset for train,val,test
|
# spliting dataset for train,val,test
|
||||||
X_train, X_val_test, y_train, y_valtest = train_test_split(
|
X_train, X_val_test, y_train, y_valtest = train_test_split(
|
||||||
dataset["text"], dataset["Value"], test_size=0.2, shuffle=True, random_state=20
|
dataset["text"], dataset["Value"], test_size=0.2, shuffle=True, random_state=20
|
||||||
@ -69,6 +77,3 @@ if args.stats:
|
|||||||
)
|
)
|
||||||
stats.to_csv((DATA_PATH / "stats.csv"))
|
stats.to_csv((DATA_PATH / "stats.csv"))
|
||||||
print(stats)
|
print(stats)
|
||||||
|
|
||||||
if not (args.dataset or args.stats):
|
|
||||||
print("NO RUN TYPE SPECIFIED")
|
|
||||||
|
Loading…
Reference in New Issue
Block a user