import tensorflow as tf import pickle import os gpus = tf.config.experimental.list_physical_devices('GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint import numpy as np from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Activation, Dropout, Dense from tensorflow.keras.layers import Embedding import sys from tensorflow.keras.layers import MaxPooling1D, GlobalMaxPooling2D, LSTM, Bidirectional from sklearn.utils import shuffle vocab_size = int(sys.argv[1]) embedding_size = int(sys.argv[2]) LSTM_SIZE = int(sys.argv[3]) DROPOUT_LSTM = float(sys.argv[4]) DROPOUT_REGULAR = float(sys.argv[5]) BATCH_SIZE = int(sys.argv[6]) checkpoints = sys.argv[7] #p1 = float(sys.argv[3]) #p2 = float(sys.argv[4]) FILE='2' train_year = [float(a.rstrip('\n')) for a in open(f'../train/expected{FILE}.tsv','r')] max_year = max(train_year) min_year = min(train_year) train_text_tokenized= pickle.load(open('train_text_30k_for_keras.pickle', 'rb')) maxlen = max([len(a) for a in train_text_tokenized]) tokenizer = pickle.load(open('tokenizer.pickle','rb')) test_text_tokenized = [a.rstrip('\n') for a in open('../test-A/in.tsv', 'r')] test_text_tokenized = tokenizer.texts_to_sequences(test_text_tokenized) test_text_tokenized = pad_sequences(test_text_tokenized, padding='post', maxlen=maxlen) eval_year = [float(a.rstrip()) for a in open(f'../dev-0/expected{FILE}.tsv','r')] maxlen = max([len(a) for a in train_text_tokenized]) model = Sequential() embedding_layer = Embedding(vocab_size, embedding_size, input_length=maxlen , trainable=True) model.add(embedding_layer) #model.add(Bidirectional(LSTM(LSTM_SIZE, dropout = DROPOUT_LSTM, return_sequences=True))) model.add(Bidirectional(LSTM(LSTM_SIZE, dropout = DROPOUT_LSTM))) model.add(Dropout(DROPOUT_REGULAR)) model.add(Dense(1, activation='linear')) model_checkpoint = sorted(os.listdir(checkpoints))[-1] model.load_weights('checkpoints/saved-model-000006-0.02.hdf5','rb') scaler = pickle.load(open('minmaxscaler.pickle', 'rb')) # EVAL PREDS eval_text_tokenized = pickle.load(open('eval_text_30k_for_keras.pickle', 'rb')) eval_preds = scaler.inverse_transform(model.predict(eval_text_tokenized, batch_size=20)) eval_preds = np.minimum(eval_preds, max_year) eval_preds = np.maximum(eval_preds, min_year) f = open(f'../dev-0/out{FILE}.tsv', 'w') for i in eval_preds: f.write(str(i[0]) + '\n') f.close() # TEST PREDS test_preds = scaler.inverse_transform(model.predict(test_text_tokenized, batch_size=20)) test_preds = np.minimum(test_preds, max_year) test_preds = np.maximum(test_preds, min_year) f = open(f'../test-A/out{FILE}.tsv', 'w') for i in test_preds: f.write(str(i[0]) + '\n') f.close()