Zad 10. DVC

This commit is contained in:
Cezary Gałązkiewicz 2022-06-05 23:57:29 +02:00
parent de2ffb9491
commit 002b3b8d6d
6 changed files with 17 additions and 3 deletions

3
.dvc/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
/config.local
/tmp
/cache

6
.dvc/config Normal file
View File

@ -0,0 +1,6 @@
[core]
remote = my_local_remote
['remote "ium_ssh_remote"']
url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
['remote "my_local_remote"']
url = ..

3
.dvcignore Normal file
View File

@ -0,0 +1,3 @@
# Add patterns of files dvc should ignore, which could improve
# the performance. Learn more at
# https://dvc.org/doc/user-guide/dvcignore

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/Steel_industry_data.csv

View File

@ -0,0 +1,4 @@
outs:
- md5: dc217c9856d659f8cf61d3156397e535
size: 2731389
path: Steel_industry_data.csv

View File

@ -1,6 +1,3 @@
kaggle datasets download -d csafrit2/steel-industry-energy-consumption --force
unzip -o steel-industry-energy-consumption.zip
head -n $CUTOFF Steel_industry_data.csv | tail -n +2 | cut -d, -f8 --complement | shuf > steel_industry_data_shuffled.csv
number_of_lines=$(wc -l steel_industry_data_shuffled.csv | awk '{print $1}')
test_len=$((number_of_lines/10))