diff --git a/Dockerfile b/Dockerfile index f132163..103fe81 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ FROM ubuntu:22.04 RUN apt update && apt install -y vim make python3 python3-pip python-is-python3 gcc g++ golang wget unzip git -RUN pip install pandas matplotlib scikit-learn +RUN pip install pandas matplotlib scikit-learn tensorflow CMD "bash" diff --git a/dataset.jenkinsfile b/dataset.jenkinsfile index be9b2b7..c92c035 100644 --- a/dataset.jenkinsfile +++ b/dataset.jenkinsfile @@ -8,7 +8,7 @@ node { checkout([$class: 'GitSCM', branches: [[name: 'ztm']], extensions: [], userRemoteConfigs: [[url: 'https://git.wmi.amu.edu.pl/s452639/ium_452639']]]) sh 'cd src; ./prepare-ztm-data.sh' - archiveArtifacts artifacts: 'src/stop_times.normalized.tsv,src/stop_times.train.tsv,src/stop_times.test.tsv,src/stop_times.valid.tsv', + archiveArtifacts artifacts: 'src/stop_times.normalized.tsv,src/stop_times.train.tsv,src/stop_times.test.tsv,src/stop_times.valid.tsv,src/stop_times.categories.tsv', followSymlinks: false } } diff --git a/run.sh b/run.sh index 563148f..e235ddf 100755 --- a/run.sh +++ b/run.sh @@ -3,4 +3,4 @@ set -xe docker build -t ium . -docker run -it ium +docker run -v .:/ium/ -it ium diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..c48b878 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,2 @@ +model.keras +pics diff --git a/src/split_train_valid_test.py b/src/split_train_valid_test.py index e4e7fee..79a5879 100755 --- a/src/split_train_valid_test.py +++ b/src/split_train_valid_test.py @@ -4,7 +4,7 @@ from sklearn.model_selection import train_test_split data = pd.read_csv('./stop_times.normalized.tsv', sep='\t') -train, test = train_test_split(data, test_size=0.5) +train, test = train_test_split(data, test_size=0.8) valid, test = train_test_split(test, test_size=0.5) train.to_csv('stop_times.train.tsv', sep='\t') diff --git a/src/tf_test.py b/src/tf_test.py new file mode 100644 index 0000000..5afbe5a --- /dev/null +++ b/src/tf_test.py @@ -0,0 +1,15 @@ +from tf_train import * +import numpy as np + +def test(): + global model, le + test_x, test_y, _ = load_data('./stop_times.test.tsv', le) + test_x = tf.convert_to_tensor(test_x, dtype=tf.float32) + test_y = tf.convert_to_tensor(test_y) + + model = tf.keras.models.load_model('model.keras') + pd.DataFrame(model.predict(test_x), columns=le.classes_).to_csv('stop_times.predictions.tsv', sep='\t') + + +if __name__ == "__main__": + test() diff --git a/src/tf.py b/src/tf_train.py similarity index 61% rename from src/tf.py rename to src/tf_train.py index f440e3b..88f05a5 100755 --- a/src/tf.py +++ b/src/tf_train.py @@ -22,20 +22,22 @@ def load_data(path: str, le: LabelEncoder): num_classes = len(le.classes_) -model = tf.keras.Sequential([ - tf.keras.layers.Input(shape=(2,)), - tf.keras.layers.Dense(4 * num_classes, activation='relu'), - tf.keras.layers.Dense(4 * num_classes, activation='relu'), - tf.keras.layers.Dense(4 * num_classes, activation='relu'), - tf.keras.layers.Dense(num_classes, activation='softmax') -]) - -model.compile(optimizer='adam', - loss=tf.keras.losses.SparseCategoricalCrossentropy(), - metrics=['accuracy']) def train(): - global model, le + global le + + model = tf.keras.Sequential([ + tf.keras.layers.Input(shape=(2,)), + tf.keras.layers.Dense(4 * num_classes, activation='relu'), + tf.keras.layers.Dense(4 * num_classes, activation='relu'), + tf.keras.layers.Dense(4 * num_classes, activation='relu'), + tf.keras.layers.Dense(num_classes, activation='softmax') + ]) + + model.compile(optimizer='adam', + loss=tf.keras.losses.SparseCategoricalCrossentropy(), + metrics=['accuracy']) + train_x, train_y, _ = load_data('./stop_times.train.tsv', le) train_x = tf.convert_to_tensor(train_x, dtype=tf.float32) train_y = tf.convert_to_tensor(train_y) @@ -50,22 +52,7 @@ def train(): with open('history', 'w') as f: print(repr(history), file=f) - model.save_weights('model.ckpt') + model.save('model.keras') -def test(): - global model, le - test_x, test_y, _ = load_data('./stop_times.test.tsv', le) - test_x = tf.convert_to_tensor(test_x, dtype=tf.float32) - test_y = tf.convert_to_tensor(test_y) - model.load_weights('model.ckpt') - model.evaluate(test_x, test_y) - -SUBCOMMANDS = { - "test": test, - "train": train, -} - -import sys -assert len(sys.argv) == 2 -assert sys.argv[1] in SUBCOMMANDS.keys() -SUBCOMMANDS[sys.argv[1]]() +if __name__ == "__main__": + train()