feat: jenkinsfile2 with docker image from dockerhub

This commit is contained in:
Filip Patyk 2023-04-03 18:29:31 +02:00
parent faa5898794
commit 1b74870dd0
2 changed files with 42 additions and 29 deletions

View File

@ -15,22 +15,30 @@ node {
//cloning git repo //cloning git repo
checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 's424714', url: 'https://git.wmi.amu.edu.pl/s424714/ium_424714']]]) checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 's424714', url: 'https://git.wmi.amu.edu.pl/s424714/ium_424714']]])
} }
stage('cloning artifacts') {
//coping artifacts
copyArtifacts fingerprintArtifacts: true, projectName: 's424714-create-dataset', selector: buildParameter('BUILD_SELECTOR')
}
stage('Shell Script') { stage('DockerHUB build') {
// executing bash scripts
sh "chmod +x -R ${env.WORKSPACE}" sh "chmod +x -R ${env.WORKSPACE}"
sh "./stats-dataset.sh"
docker.image('drfifonz/ium_s424714:1.1').inside {
stage("DOCKER: cloning artifacts"){
copyArtifacts fingerprintArtifacts: true, projectName: 's424714-create-dataset', selector: buildParameter('BUILD_SELECTOR')
sh 'mkdir -p ./data/dataset'
sh 'mv dataset.csv ./data/dataset'
} }
stage("DOCKER: Running stats script"){
sh 'python ./dataset.py --stats'
sh "cp ./data/stats.csv ${WORKSPACE}"
}
}
}
stage('Saving artefacts') { stage('Saving artefacts') {
echo 'Goodbye!' echo 'Goodbye!'
archiveArtifacts 'data/stats.txt' archiveArtifacts 'stats.csv'
} }
} }

View File

@ -13,6 +13,10 @@ parser.add_argument("--stats", action="store_true", default=False)
args = parser.parse_args() args = parser.parse_args()
if not (args.dataset or args.stats):
raise ValueError("NO RUN TYPE SPECIFIED")
if args.dataset:
TRUE_NEWS_PATH = Path("data/True.csv") TRUE_NEWS_PATH = Path("data/True.csv")
FAKE_NEWS_PATH = Path("data/Fake.csv") FAKE_NEWS_PATH = Path("data/Fake.csv")
@ -36,6 +40,10 @@ dataset = pd.concat([true_news, fake_news], axis=0)
dataset["text"] = dataset["text"].str.strip() dataset["text"] = dataset["text"].str.strip()
dataset.dropna(axis=0, how="any", inplace=False, subset=["text"]) dataset.dropna(axis=0, how="any", inplace=False, subset=["text"])
elif args.stats:
DATA_PATH = Path("data")
DATASET_FILE_PATH = DATA_PATH / "dataset/dataset.csv"
dataset = pd.read_csv(DATASET_FILE_PATH)
# spliting dataset for train,val,test # spliting dataset for train,val,test
X_train, X_val_test, y_train, y_valtest = train_test_split( X_train, X_val_test, y_train, y_valtest = train_test_split(
dataset["text"], dataset["Value"], test_size=0.2, shuffle=True, random_state=20 dataset["text"], dataset["Value"], test_size=0.2, shuffle=True, random_state=20
@ -69,6 +77,3 @@ if args.stats:
) )
stats.to_csv((DATA_PATH / "stats.csv")) stats.to_csv((DATA_PATH / "stats.csv"))
print(stats) print(stats)
if not (args.dataset or args.stats):
print("NO RUN TYPE SPECIFIED")