tweaks(ium_04) Update Jenkinsfile and add Jenkinsfile-stats.
This commit is contained in:
parent
c3d9392ad1
commit
04b5aca4ed
13
Jenkinsfile
vendored
13
Jenkinsfile
vendored
@ -1,5 +1,7 @@
|
|||||||
pipeline{
|
pipeline{
|
||||||
agent any
|
agent {
|
||||||
|
docker { image 'ubuntu:latest' }
|
||||||
|
}
|
||||||
stages{
|
stages{
|
||||||
stage('checkout: Check out from version control'){
|
stage('checkout: Check out from version control'){
|
||||||
steps{
|
steps{
|
||||||
@ -8,14 +10,7 @@ pipeline{
|
|||||||
}
|
}
|
||||||
stage('sh: Shell Script'){
|
stage('sh: Shell Script'){
|
||||||
steps{
|
steps{
|
||||||
sh '''#!/bin/bash
|
sh './script.sh'
|
||||||
wget -c https://git.wmi.amu.edu.pl/s434780/ium_434780/src/branch/master/data.csv
|
|
||||||
head -n -1 data.csv | shuf > data.csv.shuf
|
|
||||||
wc -l data.csv
|
|
||||||
head -n 500 data.csv.shuf > test.csv
|
|
||||||
head -n 500 data.csv.shuf | tail -n 500 > dev.csv
|
|
||||||
tail -n +501 data.csv.shuf > train.csv
|
|
||||||
wc -l *.csv '''
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage('Archive artifacts'){
|
stage('Archive artifacts'){
|
||||||
|
25
Jenkinsfile-stats
Normal file
25
Jenkinsfile-stats
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
pipeline{
|
||||||
|
agent any
|
||||||
|
stages{
|
||||||
|
stage('checkout: Check out from version control'){
|
||||||
|
steps{
|
||||||
|
checkout([$class: 'GitSCM', branches: [[name: '*/master']], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: '321737ab-1c4d-475f-9667-513cf19ba596', url: 'https://git.wmi.amu.edu.pl/s434780/ium_434780.git']]])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage('Copy artifacts'){
|
||||||
|
steps{
|
||||||
|
copyArtifacts fingerprintArtifacts: true, projectName: 's434780-create-dataset', selector: buildParameter('BUILD_SELECTOR')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage('sh: Shell Script'){
|
||||||
|
steps{
|
||||||
|
sh './stats.sh'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage('Archive artifacts'){
|
||||||
|
steps{
|
||||||
|
archiveArtifacts 'stats.txt'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
20
main.py
20
main.py
@ -3,10 +3,10 @@ from sklearn.model_selection import train_test_split
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
data = pd.read_csv('resources/Amazon_Consumer_Reviews.csv', header=0, sep=',')
|
data = pd.read_csv('Amazon_Consumer_Reviews.csv', header=0, sep=',')
|
||||||
|
|
||||||
columns = ['reviews.date', 'reviews.numHelpful', 'reviews.rating', 'reviews.doRecommend']
|
columns = ['reviews.date', 'reviews.numHelpful', 'reviews.rating', 'reviews.doRecommend']
|
||||||
string_columns = ['name', 'brand', 'categories', 'primaryCategories', 'keys', 'manufacturer', 'reviews.title',
|
string_columns = ['name', 'categories', 'primaryCategories', 'manufacturer', 'reviews.title',
|
||||||
'reviews.username', 'reviews.text']
|
'reviews.username', 'reviews.text']
|
||||||
|
|
||||||
data = data[string_columns + columns]
|
data = data[string_columns + columns]
|
||||||
@ -14,17 +14,19 @@ def main():
|
|||||||
for c in string_columns:
|
for c in string_columns:
|
||||||
data[c] = data[c].str.lower()
|
data[c] = data[c].str.lower()
|
||||||
|
|
||||||
print("Empty rows summary:")
|
# print("Empty rows summary:")
|
||||||
print(data.isnull().sum())
|
# print(data.isnull().sum())
|
||||||
data.dropna()
|
# data["reviews.title"].fillna("No title", inplace = True)
|
||||||
|
# print(data.isnull().sum())
|
||||||
|
|
||||||
data.to_csv('resources/data.csv')
|
data.to_csv('data.csv')
|
||||||
|
|
||||||
train, test = train_test_split(data, train_size=0.6, random_state=1)
|
train, test = train_test_split(data, train_size=0.6, random_state=1)
|
||||||
test, dev = train_test_split(test, test_size=0.5, random_state=1)
|
test, dev = train_test_split(test, test_size=0.5, random_state=1)
|
||||||
test.to_csv('resources/test.csv')
|
|
||||||
train.to_csv('resources/train.csv')
|
test.to_csv('test.csv')
|
||||||
dev.to_csv('resources/dev.csv')
|
train.to_csv('train.csv')
|
||||||
|
dev.to_csv('dev.csv')
|
||||||
|
|
||||||
print("\n\nMean reviews rating for each primary category: ")
|
print("\n\nMean reviews rating for each primary category: ")
|
||||||
print(data[["primaryCategories", "reviews.rating"]].groupby("primaryCategories").mean())
|
print(data[["primaryCategories", "reviews.rating"]].groupby("primaryCategories").mean())
|
||||||
|
Loading…
Reference in New Issue
Block a user