From 1b74870dd0ae511fe77a4c1d7126f26f72b3fe55 Mon Sep 17 00:00:00 2001 From: s424714 Date: Mon, 3 Apr 2023 18:29:31 +0200 Subject: [PATCH] feat: jenkinsfile2 with docker image from dockerhub --- Jenkinsfile2 | 26 +++++++++++++++++--------- dataset.py | 45 +++++++++++++++++++++++++-------------------- 2 files changed, 42 insertions(+), 29 deletions(-) diff --git a/Jenkinsfile2 b/Jenkinsfile2 index 3fc6ad3..1ecaf66 100644 --- a/Jenkinsfile2 +++ b/Jenkinsfile2 @@ -15,22 +15,30 @@ node { //cloning git repo checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 's424714', url: 'https://git.wmi.amu.edu.pl/s424714/ium_424714']]]) } - stage('cloning artifacts') { - //coping artifacts - copyArtifacts fingerprintArtifacts: true, projectName: 's424714-create-dataset', selector: buildParameter('BUILD_SELECTOR') - - } - stage('Shell Script') { - // executing bash scripts + stage('DockerHUB build') { + sh "chmod +x -R ${env.WORKSPACE}" - sh "./stats-dataset.sh" + docker.image('drfifonz/ium_s424714:1.1').inside { + stage("DOCKER: cloning artifacts"){ + copyArtifacts fingerprintArtifacts: true, projectName: 's424714-create-dataset', selector: buildParameter('BUILD_SELECTOR') + sh 'mkdir -p ./data/dataset' + sh 'mv dataset.csv ./data/dataset' + } + stage("DOCKER: Running stats script"){ + sh 'python ./dataset.py --stats' + sh "cp ./data/stats.csv ${WORKSPACE}" + } + } } + + + stage('Saving artefacts') { echo 'Goodbye!' - archiveArtifacts 'data/stats.txt' + archiveArtifacts 'stats.csv' } } \ No newline at end of file diff --git a/dataset.py b/dataset.py index aa170f4..86d3957 100644 --- a/dataset.py +++ b/dataset.py @@ -13,29 +13,37 @@ parser.add_argument("--stats", action="store_true", default=False) args = parser.parse_args() -TRUE_NEWS_PATH = Path("data/True.csv") -FAKE_NEWS_PATH = Path("data/Fake.csv") +if not (args.dataset or args.stats): + raise ValueError("NO RUN TYPE SPECIFIED") -DATA_PATH = TRUE_NEWS_PATH.parent -DATASET_PATH = DATA_PATH / "dataset" +if args.dataset: + TRUE_NEWS_PATH = Path("data/True.csv") + FAKE_NEWS_PATH = Path("data/Fake.csv") -# loading datasets -true_news = pd.read_csv(TRUE_NEWS_PATH) -fake_news = pd.read_csv(FAKE_NEWS_PATH) + DATA_PATH = TRUE_NEWS_PATH.parent + DATASET_PATH = DATA_PATH / "dataset" -# clearing dataset -true_news = true_news.drop(columns=["title", "subject", "date"]) -fake_news = fake_news.drop(columns=["title", "subject", "date"]) + # loading datasets + true_news = pd.read_csv(TRUE_NEWS_PATH) + fake_news = pd.read_csv(FAKE_NEWS_PATH) -# setting binary classification -true_news["Value"] = 1 -fake_news["Value"] = 0 + # clearing dataset + true_news = true_news.drop(columns=["title", "subject", "date"]) + fake_news = fake_news.drop(columns=["title", "subject", "date"]) -# merging dataset -dataset = pd.concat([true_news, fake_news], axis=0) -dataset["text"] = dataset["text"].str.strip() -dataset.dropna(axis=0, how="any", inplace=False, subset=["text"]) + # setting binary classification + true_news["Value"] = 1 + fake_news["Value"] = 0 + # merging dataset + dataset = pd.concat([true_news, fake_news], axis=0) + dataset["text"] = dataset["text"].str.strip() + dataset.dropna(axis=0, how="any", inplace=False, subset=["text"]) + +elif args.stats: + DATA_PATH = Path("data") + DATASET_FILE_PATH = DATA_PATH / "dataset/dataset.csv" + dataset = pd.read_csv(DATASET_FILE_PATH) # spliting dataset for train,val,test X_train, X_val_test, y_train, y_valtest = train_test_split( dataset["text"], dataset["Value"], test_size=0.2, shuffle=True, random_state=20 @@ -69,6 +77,3 @@ if args.stats: ) stats.to_csv((DATA_PATH / "stats.csv")) print(stats) - -if not (args.dataset or args.stats): - print("NO RUN TYPE SPECIFIED")