Dodanie DVC

This commit is contained in:
Anna Nowak 2021-06-12 22:18:46 +02:00
parent abaadfb911
commit b90c2354fc
14 changed files with 571 additions and 5 deletions

3
.dvc/.gitignore vendored Executable file
View File

@ -0,0 +1,3 @@
/config.local
/tmp
/cache

4
.dvc/config Executable file
View File

@ -0,0 +1,4 @@
[core]
remote = ium_ssh_remote
['remote "ium_ssh_remote"']
url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl/ium-sftp

107
.dvc/plots/confusion.json Executable file
View File

@ -0,0 +1,107 @@
{
"$schema": "https://vega.github.io/schema/vega-lite/v4.json",
"data": {
"values": "<DVC_METRIC_DATA>"
},
"title": "<DVC_METRIC_TITLE>",
"facet": {
"field": "rev",
"type": "nominal"
},
"spec": {
"transform": [
{
"aggregate": [
{
"op": "count",
"as": "xy_count"
}
],
"groupby": [
"<DVC_METRIC_Y>",
"<DVC_METRIC_X>"
]
},
{
"impute": "xy_count",
"groupby": [
"rev",
"<DVC_METRIC_Y>"
],
"key": "<DVC_METRIC_X>",
"value": 0
},
{
"impute": "xy_count",
"groupby": [
"rev",
"<DVC_METRIC_X>"
],
"key": "<DVC_METRIC_Y>",
"value": 0
},
{
"joinaggregate": [
{
"op": "max",
"field": "xy_count",
"as": "max_count"
}
],
"groupby": []
},
{
"calculate": "datum.xy_count / datum.max_count",
"as": "percent_of_max"
}
],
"encoding": {
"x": {
"field": "<DVC_METRIC_X>",
"type": "nominal",
"sort": "ascending",
"title": "<DVC_METRIC_X_LABEL>"
},
"y": {
"field": "<DVC_METRIC_Y>",
"type": "nominal",
"sort": "ascending",
"title": "<DVC_METRIC_Y_LABEL>"
}
},
"layer": [
{
"mark": "rect",
"width": 300,
"height": 300,
"encoding": {
"color": {
"field": "xy_count",
"type": "quantitative",
"title": "",
"scale": {
"domainMin": 0,
"nice": true
}
}
}
},
{
"mark": "text",
"encoding": {
"text": {
"field": "xy_count",
"type": "quantitative"
},
"color": {
"condition": {
"test": "datum.percent_of_max > 0.5",
"value": "white"
},
"value": "black"
}
}
}
]
}
}

View File

@ -0,0 +1,112 @@
{
"$schema": "https://vega.github.io/schema/vega-lite/v4.json",
"data": {
"values": "<DVC_METRIC_DATA>"
},
"title": "<DVC_METRIC_TITLE>",
"facet": {
"field": "rev",
"type": "nominal"
},
"spec": {
"transform": [
{
"aggregate": [
{
"op": "count",
"as": "xy_count"
}
],
"groupby": [
"<DVC_METRIC_Y>",
"<DVC_METRIC_X>"
]
},
{
"impute": "xy_count",
"groupby": [
"rev",
"<DVC_METRIC_Y>"
],
"key": "<DVC_METRIC_X>",
"value": 0
},
{
"impute": "xy_count",
"groupby": [
"rev",
"<DVC_METRIC_X>"
],
"key": "<DVC_METRIC_Y>",
"value": 0
},
{
"joinaggregate": [
{
"op": "sum",
"field": "xy_count",
"as": "sum_y"
}
],
"groupby": [
"<DVC_METRIC_Y>"
]
},
{
"calculate": "datum.xy_count / datum.sum_y",
"as": "percent_of_y"
}
],
"encoding": {
"x": {
"field": "<DVC_METRIC_X>",
"type": "nominal",
"sort": "ascending",
"title": "<DVC_METRIC_X_LABEL>"
},
"y": {
"field": "<DVC_METRIC_Y>",
"type": "nominal",
"sort": "ascending",
"title": "<DVC_METRIC_Y_LABEL>"
}
},
"layer": [
{
"mark": "rect",
"width": 300,
"height": 300,
"encoding": {
"color": {
"field": "percent_of_y",
"type": "quantitative",
"title": "",
"scale": {
"domain": [
0,
1
]
}
}
}
},
{
"mark": "text",
"encoding": {
"text": {
"field": "percent_of_y",
"type": "quantitative",
"format": ".2f"
},
"color": {
"condition": {
"test": "datum.percent_of_y > 0.5",
"value": "white"
},
"value": "black"
}
}
}
]
}
}

31
.dvc/plots/default.json Executable file
View File

@ -0,0 +1,31 @@
{
"$schema": "https://vega.github.io/schema/vega-lite/v4.json",
"data": {
"values": "<DVC_METRIC_DATA>"
},
"title": "<DVC_METRIC_TITLE>",
"width": 300,
"height": 300,
"mark": {
"type": "line"
},
"encoding": {
"x": {
"field": "<DVC_METRIC_X>",
"type": "quantitative",
"title": "<DVC_METRIC_X_LABEL>"
},
"y": {
"field": "<DVC_METRIC_Y>",
"type": "quantitative",
"title": "<DVC_METRIC_Y_LABEL>",
"scale": {
"zero": false
}
},
"color": {
"field": "rev",
"type": "nominal"
}
}
}

116
.dvc/plots/linear.json Executable file
View File

@ -0,0 +1,116 @@
{
"$schema": "https://vega.github.io/schema/vega-lite/v4.json",
"data": {
"values": "<DVC_METRIC_DATA>"
},
"title": "<DVC_METRIC_TITLE>",
"width": 300,
"height": 300,
"layer": [
{
"encoding": {
"x": {
"field": "<DVC_METRIC_X>",
"type": "quantitative",
"title": "<DVC_METRIC_X_LABEL>"
},
"y": {
"field": "<DVC_METRIC_Y>",
"type": "quantitative",
"title": "<DVC_METRIC_Y_LABEL>",
"scale": {
"zero": false
}
},
"color": {
"field": "rev",
"type": "nominal"
}
},
"layer": [
{
"mark": "line"
},
{
"selection": {
"label": {
"type": "single",
"nearest": true,
"on": "mouseover",
"encodings": [
"x"
],
"empty": "none",
"clear": "mouseout"
}
},
"mark": "point",
"encoding": {
"opacity": {
"condition": {
"selection": "label",
"value": 1
},
"value": 0
}
}
}
]
},
{
"transform": [
{
"filter": {
"selection": "label"
}
}
],
"layer": [
{
"mark": {
"type": "rule",
"color": "gray"
},
"encoding": {
"x": {
"field": "<DVC_METRIC_X>",
"type": "quantitative"
}
}
},
{
"encoding": {
"text": {
"type": "quantitative",
"field": "<DVC_METRIC_Y>"
},
"x": {
"field": "<DVC_METRIC_X>",
"type": "quantitative"
},
"y": {
"field": "<DVC_METRIC_Y>",
"type": "quantitative"
}
},
"layer": [
{
"mark": {
"type": "text",
"align": "left",
"dx": 5,
"dy": -5
},
"encoding": {
"color": {
"type": "nominal",
"field": "rev"
}
}
}
]
}
]
}
]
}

104
.dvc/plots/scatter.json Executable file
View File

@ -0,0 +1,104 @@
{
"$schema": "https://vega.github.io/schema/vega-lite/v4.json",
"data": {
"values": "<DVC_METRIC_DATA>"
},
"title": "<DVC_METRIC_TITLE>",
"width": 300,
"height": 300,
"layer": [
{
"encoding": {
"x": {
"field": "<DVC_METRIC_X>",
"type": "quantitative",
"title": "<DVC_METRIC_X_LABEL>"
},
"y": {
"field": "<DVC_METRIC_Y>",
"type": "quantitative",
"title": "<DVC_METRIC_Y_LABEL>",
"scale": {
"zero": false
}
},
"color": {
"field": "rev",
"type": "nominal"
}
},
"layer": [
{
"mark": "point"
},
{
"selection": {
"label": {
"type": "single",
"nearest": true,
"on": "mouseover",
"encodings": [
"x"
],
"empty": "none",
"clear": "mouseout"
}
},
"mark": "point",
"encoding": {
"opacity": {
"condition": {
"selection": "label",
"value": 1
},
"value": 0
}
}
}
]
},
{
"transform": [
{
"filter": {
"selection": "label"
}
}
],
"layer": [
{
"encoding": {
"text": {
"type": "quantitative",
"field": "<DVC_METRIC_Y>"
},
"x": {
"field": "<DVC_METRIC_X>",
"type": "quantitative"
},
"y": {
"field": "<DVC_METRIC_Y>",
"type": "quantitative"
}
},
"layer": [
{
"mark": {
"type": "text",
"align": "left",
"dx": 5,
"dy": -5
},
"encoding": {
"color": {
"type": "nominal",
"field": "rev"
}
}
}
]
}
]
}
]
}

39
.dvc/plots/smooth.json Executable file
View File

@ -0,0 +1,39 @@
{
"$schema": "https://vega.github.io/schema/vega-lite/v4.json",
"data": {
"values": "<DVC_METRIC_DATA>"
},
"title": "<DVC_METRIC_TITLE>",
"mark": {
"type": "line"
},
"encoding": {
"x": {
"field": "<DVC_METRIC_X>",
"type": "quantitative",
"title": "<DVC_METRIC_X_LABEL>"
},
"y": {
"field": "<DVC_METRIC_Y>",
"type": "quantitative",
"title": "<DVC_METRIC_Y_LABEL>",
"scale": {
"zero": false
}
},
"color": {
"field": "rev",
"type": "nominal"
}
},
"transform": [
{
"loess": "<DVC_METRIC_Y>",
"on": "<DVC_METRIC_X>",
"groupby": [
"rev"
],
"bandwidth": 0.3
}
]
}

3
.dvcignore Executable file
View File

@ -0,0 +1,3 @@
# Add patterns of files dvc should ignore, which could improve
# the performance. Learn more at
# https://dvc.org/doc/user-guide/dvcignore

2
.gitignore vendored
View File

@ -56,7 +56,7 @@ geckodriver.log
docs/source/changelog.md docs/source/changelog.md
#fifa dataset #fifa dataset
fifa19* fifa19.zip
data.csv data.csv
test.csv test.csv
train.csv train.csv

View File

@ -233,11 +233,10 @@ clubs = ['FC Barcelona', 'Juventus', 'Paris Saint-Germain',
'Bohemian FC', 'Macclesfield Town', 'Newport County', 'Bohemian FC', 'Macclesfield Town', 'Newport County',
'Sligo Rovers', 'Derry City', 'Limerick FC', 'Bray Wanderers'] 'Sligo Rovers', 'Derry City', 'Limerick FC', 'Bray Wanderers']
if os.getenv("KAGGLE_KEY") is None or os.getenv("KAGGLE_USERNAME") is None: if not os.path.isfile('fifa19.zip'):
if os.getenv("KAGGLE_KEY") is None or os.getenv("KAGGLE_USERNAME") is None:
print("Brak zmiennych środowiskowych KAGGLE_KEY lub KAAGLE_USERNAME") print("Brak zmiennych środowiskowych KAGGLE_KEY lub KAAGLE_USERNAME")
exit() exit()
if not os.path.isfile('fifa19.zip'):
os.system('kaggle datasets download -d karangadiya/fifa19') os.system('kaggle datasets download -d karangadiya/fifa19')
with zipfile.ZipFile('fifa19.zip', 'r') as zip_ref: with zipfile.ZipFile('fifa19.zip', 'r') as zip_ref:

28
dvc.lock Normal file
View File

@ -0,0 +1,28 @@
schema: '2.0'
stages:
prepare_data:
cmd: python3 "Zadanie 1.py"
deps:
- path: Zadanie 1.py
md5: 4ba7222268bcb51b49534a5d90499778
size: 16874
outs:
- path: dev.csv
md5: d59bfad650c61f08767f87d92ae3c0e4
size: 2019119
isexec: true
- path: test.csv
md5: bed407925f63ed9003ad725426de7b6e
size: 2015014
isexec: true
train:
cmd: python3 train.py 15 16
deps:
- path: train.py
md5: cd2546c93817b7d242c6f5a53aeb453a
size: 1426
outs:
- path: model.h5
md5: 89e5ec7ee21be7e315c2bca1f9dbf4b2
size: 43728
isexec: true

15
dvc.yaml Normal file
View File

@ -0,0 +1,15 @@
stages:
prepare_data:
cmd: python3 "Zadanie 1.py"
deps:
- Zadanie 1.py
outs:
- test.csv
- dev.csv
- test.csv
train:
cmd: python3 train.py 15 16
deps:
- train.py
outs:
- model.h5

5
fifa19.zip.dvc Normal file
View File

@ -0,0 +1,5 @@
outs:
- md5: a916123e55722dabae727aac93c45ab1
size: 2281534
path: fifa19.zip
isexec: true