diff --git a/.dvc/.gitignore b/.dvc/.gitignore
new file mode 100644
index 0000000..528f30c
--- /dev/null
+++ b/.dvc/.gitignore
@@ -0,0 +1,3 @@
+/config.local
+/tmp
+/cache
diff --git a/.dvc/config b/.dvc/config
new file mode 100644
index 0000000..629b7f1
--- /dev/null
+++ b/.dvc/config
@@ -0,0 +1,6 @@
+[core]
+    remote = my_local_remote
+['remote "ium_ssh_remote"']
+    url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
+['remote "my_local_remote"']
+    url = ..
diff --git a/.dvcignore b/.dvcignore
new file mode 100644
index 0000000..5197305
--- /dev/null
+++ b/.dvcignore
@@ -0,0 +1,3 @@
+# Add patterns of files dvc should ignore, which could improve
+# the performance. Learn more at
+# https://dvc.org/doc/user-guide/dvcignore
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..8781153
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/Steel_industry_data.csv
diff --git a/Steel_industry_data.csv.dvc b/Steel_industry_data.csv.dvc
new file mode 100644
index 0000000..696706b
--- /dev/null
+++ b/Steel_industry_data.csv.dvc
@@ -0,0 +1,4 @@
+outs:
+- md5: dc217c9856d659f8cf61d3156397e535
+  size: 2731389
+  path: Steel_industry_data.csv
diff --git a/download.sh b/download.sh
index 497211a..e7ef608 100644
--- a/download.sh
+++ b/download.sh
@@ -1,6 +1,3 @@
-kaggle datasets download -d csafrit2/steel-industry-energy-consumption --force
-unzip -o steel-industry-energy-consumption.zip
-
 head -n $CUTOFF Steel_industry_data.csv | tail -n +2 | cut -d, -f8 --complement | shuf > steel_industry_data_shuffled.csv
 number_of_lines=$(wc -l steel_industry_data_shuffled.csv | awk '{print $1}')
 test_len=$((number_of_lines/10))