63 KiB
63 KiB
Sprawdzanie zbioru danych
import pandas as pd
import numpy as np
training_file = pd.read_csv("/kaggle/input/anki-en-fin/fin.txt", sep='\t', names=["English","Finnish","attribution"])
training_file.head()
English | Finnish | attribution | |
---|---|---|---|
0 | Go. | Mene. | CC-BY 2.0 (France) Attribution: tatoeba.org #2... |
1 | Hi. | Moro! | CC-BY 2.0 (France) Attribution: tatoeba.org #5... |
2 | Hi. | Terve. | CC-BY 2.0 (France) Attribution: tatoeba.org #5... |
3 | Run! | Juokse! | CC-BY 2.0 (France) Attribution: tatoeba.org #9... |
4 | Run! | Juoskaa! | CC-BY 2.0 (France) Attribution: tatoeba.org #9... |
Pierwsze przykłady są dosyć ciekawe:
non_dup = training_file["English"].drop_duplicates()
non_dup.iloc[:10]
0 Go. 1 Hi. 3 Run! 5 Run. 6 Who? 7 Wow! 10 Duck! 12 Fire! 13 Help! 16 Hide. Name: English, dtype: object
Korzystanie z tutoriala z Keras:
import os
batch_size = 64 # Batch size for training.
epochs = 100 # Number of epochs to train for.
latent_dim = 256 # Latent dimensionality of the encoding space.
#num_samples = 185000 # Number of samples to train on.
num_samples = 20000
# Path to the data txt file on disk.
data_path = "/kaggle/input/anki-en-fin/fin.txt"
import random
# Vectorize the data.
input_texts = [] # Fin
target_texts = [] # En
input_characters = set()
target_characters = set()
with open(data_path, "r", encoding="utf-8") as f:
lines = f.read().split("\n")
random.shuffle(lines)
for line in lines[: min(num_samples, len(lines) - 1)]:
if len(line.split("\t"))!=3:
continue
target_text, input_text, _ = line.split("\t")
# We use "tab" as the "start sequence" character
# for the targets, and "\n" as "end sequence" character.
target_text = "\t" + target_text + "\n"
input_texts.append(input_text)
target_texts.append(target_text)
for char in input_text:
if char not in input_characters:
input_characters.add(char)
for char in target_text:
if char not in target_characters:
target_characters.add(char)
input_characters = sorted(list(input_characters))
target_characters = sorted(list(target_characters))
num_encoder_tokens = len(input_characters)
num_decoder_tokens = len(target_characters)
max_encoder_seq_length = max([len(txt) for txt in input_texts])
max_decoder_seq_length = max([len(txt) for txt in target_texts])
print("Number of samples:", len(input_texts))
print("Number of unique input tokens:", num_encoder_tokens)
print("Number of unique output tokens:", num_decoder_tokens)
print("Max sequence length for inputs:", max_encoder_seq_length)
print("Max sequence length for outputs:", max_decoder_seq_length)
Number of samples: 20000 Number of unique input tokens: 87 Number of unique output tokens: 79 Max sequence length for inputs: 211 Max sequence length for outputs: 175
input_token_index = dict([(char, i) for i, char in enumerate(input_characters)])
target_token_index = dict([(char, i) for i, char in enumerate(target_characters)])
encoder_input_data = np.zeros(
(len(input_texts), max_encoder_seq_length, num_encoder_tokens),
dtype="float32",
)
decoder_input_data = np.zeros(
(len(input_texts), max_decoder_seq_length, num_decoder_tokens),
dtype="float32",
)
decoder_target_data = np.zeros(
(len(input_texts), max_decoder_seq_length, num_decoder_tokens),
dtype="float32",
)
for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
for t, char in enumerate(input_text):
encoder_input_data[i, t, input_token_index[char]] = 1.0
encoder_input_data[i, t + 1 :, input_token_index[" "]] = 1.0
for t, char in enumerate(target_text):
# decoder_target_data is ahead of decoder_input_data by one timestep
decoder_input_data[i, t, target_token_index[char]] = 1.0
if t > 0:
# decoder_target_data will be ahead by one timestep
# and will not include the start character.
decoder_target_data[i, t - 1, target_token_index[char]] = 1.0
decoder_input_data[i, t + 1 :, target_token_index[" "]] = 1.0
decoder_target_data[i, t:, target_token_index[" "]] = 1.0
import keras
# Define an input sequence and process it.
encoder_inputs = keras.Input(shape=(None, num_encoder_tokens))
encoder = keras.layers.LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)
# We discard `encoder_outputs` and only keep the states.
encoder_states = [state_h, state_c]
# Set up the decoder, using `encoder_states` as initial state.
decoder_inputs = keras.Input(shape=(None, num_decoder_tokens))
# We set up our decoder to return full output sequences,
# and to return internal states as well. We don't use the
# return states in the training model, but we will use them in inference.
decoder_lstm = keras.layers.LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
decoder_dense = keras.layers.Dense(num_decoder_tokens, activation="softmax")
decoder_outputs = decoder_dense(decoder_outputs)
# Define the model that will turn
# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
model = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs)
2024-05-25 12:00:51.675986: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered 2024-05-25 12:00:51.676076: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered 2024-05-25 12:00:51.849007: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
model.summary()
Model: "functional_1"
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ Connected to ┃ ┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩ │ input_layer │ (None, None, 87) │ 0 │ - │ │ (InputLayer) │ │ │ │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ input_layer_1 │ (None, None, 79) │ 0 │ - │ │ (InputLayer) │ │ │ │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ lstm (LSTM) │ [(None, 256), │ 352,256 │ input_layer[0][0] │ │ │ (None, 256), │ │ │ │ │ (None, 256)] │ │ │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ lstm_1 (LSTM) │ [(None, None, │ 344,064 │ input_layer_1[0]… │ │ │ 256), (None, │ │ lstm[0][1], │ │ │ 256), (None, │ │ lstm[0][2] │ │ │ 256)] │ │ │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ dense (Dense) │ (None, None, 79) │ 20,303 │ lstm_1[0][0] │ └─────────────────────┴───────────────────┴────────────┴───────────────────┘
Total params: 716,623 (2.73 MB)
Trainable params: 716,623 (2.73 MB)
Non-trainable params: 0 (0.00 B)
from tensorflow.python.keras import backend as K
K._get_available_gpus()
['/device:GPU:0', '/device:GPU:1']
model.compile(
optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"]
)
model.fit(
[encoder_input_data, decoder_input_data],
decoder_target_data,
batch_size=batch_size,
epochs=epochs,
validation_split=0.2,
)
# Save model
model.save("s2s_fin_en_model.keras")
Epoch 1/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 44ms/step - accuracy: 0.8327 - loss: 0.9095 - val_accuracy: 0.8553 - val_loss: 0.5346 Epoch 2/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 35ms/step - accuracy: 0.8559 - loss: 0.5304 - val_accuracy: 0.8693 - val_loss: 0.4829 Epoch 3/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 36ms/step - accuracy: 0.8537 - loss: 0.5072 - val_accuracy: 0.8797 - val_loss: 0.4209 Epoch 4/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 36ms/step - accuracy: 0.8820 - loss: 0.4118 - val_accuracy: 0.8870 - val_loss: 0.3939 Epoch 5/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 36ms/step - accuracy: 0.8880 - loss: 0.3853 - val_accuracy: 0.8893 - val_loss: 0.3766 Epoch 6/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 37ms/step - accuracy: 0.8908 - loss: 0.3720 - val_accuracy: 0.8925 - val_loss: 0.3642 Epoch 7/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 37ms/step - accuracy: 0.8940 - loss: 0.3597 - val_accuracy: 0.8950 - val_loss: 0.3552 Epoch 8/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 37ms/step - accuracy: 0.8981 - loss: 0.3451 - val_accuracy: 0.8980 - val_loss: 0.3441 Epoch 9/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.8998 - loss: 0.3391 - val_accuracy: 0.9000 - val_loss: 0.3382 Epoch 10/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 38ms/step - accuracy: 0.9024 - loss: 0.3299 - val_accuracy: 0.9015 - val_loss: 0.3293 Epoch 11/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 38ms/step - accuracy: 0.9042 - loss: 0.3228 - val_accuracy: 0.9042 - val_loss: 0.3219 Epoch 12/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 38ms/step - accuracy: 0.9055 - loss: 0.3177 - val_accuracy: 0.9053 - val_loss: 0.3170 Epoch 13/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 38ms/step - accuracy: 0.9065 - loss: 0.3134 - val_accuracy: 0.9056 - val_loss: 0.3137 Epoch 14/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9091 - loss: 0.3051 - val_accuracy: 0.9077 - val_loss: 0.3086 Epoch 15/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 37ms/step - accuracy: 0.9101 - loss: 0.3007 - val_accuracy: 0.9091 - val_loss: 0.3035 Epoch 16/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9118 - loss: 0.2943 - val_accuracy: 0.9105 - val_loss: 0.2985 Epoch 17/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 37ms/step - accuracy: 0.9132 - loss: 0.2899 - val_accuracy: 0.9117 - val_loss: 0.2943 Epoch 18/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9140 - loss: 0.2867 - val_accuracy: 0.9127 - val_loss: 0.2905 Epoch 19/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9148 - loss: 0.2837 - val_accuracy: 0.9139 - val_loss: 0.2863 Epoch 20/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9163 - loss: 0.2783 - val_accuracy: 0.9145 - val_loss: 0.2840 Epoch 21/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9173 - loss: 0.2747 - val_accuracy: 0.9151 - val_loss: 0.2808 Epoch 22/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9183 - loss: 0.2706 - val_accuracy: 0.9164 - val_loss: 0.2776 Epoch 23/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 38ms/step - accuracy: 0.9192 - loss: 0.2677 - val_accuracy: 0.9172 - val_loss: 0.2748 Epoch 24/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9200 - loss: 0.2654 - val_accuracy: 0.9180 - val_loss: 0.2725 Epoch 25/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9205 - loss: 0.2635 - val_accuracy: 0.9190 - val_loss: 0.2694 Epoch 26/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9219 - loss: 0.2587 - val_accuracy: 0.9191 - val_loss: 0.2675 Epoch 27/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9221 - loss: 0.2577 - val_accuracy: 0.9194 - val_loss: 0.2677 Epoch 28/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9230 - loss: 0.2544 - val_accuracy: 0.9202 - val_loss: 0.2638 Epoch 29/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9234 - loss: 0.2532 - val_accuracy: 0.9212 - val_loss: 0.2614 Epoch 30/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9240 - loss: 0.2514 - val_accuracy: 0.9217 - val_loss: 0.2597 Epoch 31/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9248 - loss: 0.2484 - val_accuracy: 0.9215 - val_loss: 0.2588 Epoch 32/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9257 - loss: 0.2457 - val_accuracy: 0.9222 - val_loss: 0.2573 Epoch 33/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 38ms/step - accuracy: 0.9258 - loss: 0.2445 - val_accuracy: 0.9229 - val_loss: 0.2552 Epoch 34/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 38ms/step - accuracy: 0.9259 - loss: 0.2444 - val_accuracy: 0.9231 - val_loss: 0.2540 Epoch 35/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9270 - loss: 0.2415 - val_accuracy: 0.9233 - val_loss: 0.2532 Epoch 36/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9276 - loss: 0.2389 - val_accuracy: 0.9239 - val_loss: 0.2510 Epoch 37/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9276 - loss: 0.2385 - val_accuracy: 0.9245 - val_loss: 0.2500 Epoch 38/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9289 - loss: 0.2343 - val_accuracy: 0.9246 - val_loss: 0.2482 Epoch 39/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9288 - loss: 0.2344 - val_accuracy: 0.9256 - val_loss: 0.2465 Epoch 40/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 38ms/step - accuracy: 0.9291 - loss: 0.2334 - val_accuracy: 0.9255 - val_loss: 0.2454 Epoch 41/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9293 - loss: 0.2322 - val_accuracy: 0.9258 - val_loss: 0.2444 Epoch 42/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9300 - loss: 0.2298 - val_accuracy: 0.9258 - val_loss: 0.2439 Epoch 43/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9301 - loss: 0.2292 - val_accuracy: 0.9262 - val_loss: 0.2432 Epoch 44/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9305 - loss: 0.2282 - val_accuracy: 0.9261 - val_loss: 0.2432 Epoch 45/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9309 - loss: 0.2276 - val_accuracy: 0.9269 - val_loss: 0.2411 Epoch 46/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9316 - loss: 0.2246 - val_accuracy: 0.9270 - val_loss: 0.2410 Epoch 47/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9318 - loss: 0.2240 - val_accuracy: 0.9268 - val_loss: 0.2406 Epoch 48/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9321 - loss: 0.2229 - val_accuracy: 0.9276 - val_loss: 0.2384 Epoch 49/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 38ms/step - accuracy: 0.9323 - loss: 0.2225 - val_accuracy: 0.9283 - val_loss: 0.2372 Epoch 50/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9327 - loss: 0.2209 - val_accuracy: 0.9282 - val_loss: 0.2374 Epoch 51/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9330 - loss: 0.2199 - val_accuracy: 0.9284 - val_loss: 0.2363 Epoch 52/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9340 - loss: 0.2162 - val_accuracy: 0.9284 - val_loss: 0.2361 Epoch 53/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9336 - loss: 0.2181 - val_accuracy: 0.9287 - val_loss: 0.2351 Epoch 54/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9340 - loss: 0.2161 - val_accuracy: 0.9289 - val_loss: 0.2349 Epoch 55/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9344 - loss: 0.2147 - val_accuracy: 0.9291 - val_loss: 0.2343 Epoch 56/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9350 - loss: 0.2128 - val_accuracy: 0.9294 - val_loss: 0.2334 Epoch 57/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9348 - loss: 0.2135 - val_accuracy: 0.9287 - val_loss: 0.2332 Epoch 58/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9355 - loss: 0.2114 - val_accuracy: 0.9294 - val_loss: 0.2325 Epoch 59/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9357 - loss: 0.2105 - val_accuracy: 0.9295 - val_loss: 0.2325 Epoch 60/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9360 - loss: 0.2097 - val_accuracy: 0.9294 - val_loss: 0.2328 Epoch 61/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9361 - loss: 0.2092 - val_accuracy: 0.9298 - val_loss: 0.2316 Epoch 62/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9363 - loss: 0.2081 - val_accuracy: 0.9296 - val_loss: 0.2318 Epoch 63/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9368 - loss: 0.2062 - val_accuracy: 0.9302 - val_loss: 0.2303 Epoch 64/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9368 - loss: 0.2064 - val_accuracy: 0.9306 - val_loss: 0.2300 Epoch 65/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9374 - loss: 0.2049 - val_accuracy: 0.9308 - val_loss: 0.2293 Epoch 66/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9380 - loss: 0.2025 - val_accuracy: 0.9306 - val_loss: 0.2292 Epoch 67/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9376 - loss: 0.2040 - val_accuracy: 0.9306 - val_loss: 0.2301 Epoch 68/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9385 - loss: 0.2012 - val_accuracy: 0.9306 - val_loss: 0.2294 Epoch 69/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9388 - loss: 0.1999 - val_accuracy: 0.9310 - val_loss: 0.2288 Epoch 70/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9387 - loss: 0.1999 - val_accuracy: 0.9307 - val_loss: 0.2297 Epoch 71/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9382 - loss: 0.2019 - val_accuracy: 0.9311 - val_loss: 0.2277 Epoch 72/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9395 - loss: 0.1973 - val_accuracy: 0.9311 - val_loss: 0.2280 Epoch 73/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9395 - loss: 0.1978 - val_accuracy: 0.9312 - val_loss: 0.2284 Epoch 74/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9394 - loss: 0.1985 - val_accuracy: 0.9315 - val_loss: 0.2274 Epoch 75/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 38ms/step - accuracy: 0.9400 - loss: 0.1959 - val_accuracy: 0.9315 - val_loss: 0.2273 Epoch 76/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9400 - loss: 0.1959 - val_accuracy: 0.9315 - val_loss: 0.2269 Epoch 77/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9406 - loss: 0.1942 - val_accuracy: 0.9310 - val_loss: 0.2284 Epoch 78/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9406 - loss: 0.1933 - val_accuracy: 0.9314 - val_loss: 0.2278 Epoch 79/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9407 - loss: 0.1928 - val_accuracy: 0.9316 - val_loss: 0.2272 Epoch 80/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9409 - loss: 0.1930 - val_accuracy: 0.9318 - val_loss: 0.2273 Epoch 81/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9413 - loss: 0.1915 - val_accuracy: 0.9316 - val_loss: 0.2271 Epoch 82/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9417 - loss: 0.1903 - val_accuracy: 0.9316 - val_loss: 0.2278 Epoch 83/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9418 - loss: 0.1897 - val_accuracy: 0.9316 - val_loss: 0.2265 Epoch 84/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9423 - loss: 0.1883 - val_accuracy: 0.9317 - val_loss: 0.2274 Epoch 85/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9426 - loss: 0.1868 - val_accuracy: 0.9319 - val_loss: 0.2275 Epoch 86/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 38ms/step - accuracy: 0.9426 - loss: 0.1868 - val_accuracy: 0.9317 - val_loss: 0.2267 Epoch 87/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9425 - loss: 0.1876 - val_accuracy: 0.9317 - val_loss: 0.2285 Epoch 88/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9434 - loss: 0.1846 - val_accuracy: 0.9322 - val_loss: 0.2263 Epoch 89/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9430 - loss: 0.1854 - val_accuracy: 0.9320 - val_loss: 0.2269 Epoch 90/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9437 - loss: 0.1834 - val_accuracy: 0.9319 - val_loss: 0.2280 Epoch 91/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9437 - loss: 0.1838 - val_accuracy: 0.9321 - val_loss: 0.2275 Epoch 92/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9435 - loss: 0.1841 - val_accuracy: 0.9321 - val_loss: 0.2271 Epoch 93/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9440 - loss: 0.1832 - val_accuracy: 0.9323 - val_loss: 0.2273 Epoch 94/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9443 - loss: 0.1816 - val_accuracy: 0.9320 - val_loss: 0.2281 Epoch 95/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9444 - loss: 0.1808 - val_accuracy: 0.9325 - val_loss: 0.2272 Epoch 96/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9445 - loss: 0.1808 - val_accuracy: 0.9326 - val_loss: 0.2275 Epoch 97/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9447 - loss: 0.1801 - val_accuracy: 0.9326 - val_loss: 0.2276 Epoch 98/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9453 - loss: 0.1787 - val_accuracy: 0.9321 - val_loss: 0.2283 Epoch 99/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9447 - loss: 0.1800 - val_accuracy: 0.9323 - val_loss: 0.2278 Epoch 100/100 [1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9452 - loss: 0.1782 - val_accuracy: 0.9322 - val_loss: 0.2289
encoder_inputs = model.input[0] # input_1
encoder_outputs, state_h_enc, state_c_enc = model.layers[2].output # lstm_1
encoder_states = [state_h_enc, state_c_enc]
encoder_model = keras.Model(encoder_inputs, encoder_states)
decoder_inputs = model.input[1] # input_2
decoder_state_input_h = keras.Input(shape=(latent_dim,))
decoder_state_input_c = keras.Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_lstm = model.layers[3]
decoder_outputs, state_h_dec, state_c_dec = decoder_lstm(
decoder_inputs, initial_state=decoder_states_inputs
)
decoder_states = [state_h_dec, state_c_dec]
decoder_dense = model.layers[4]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = keras.Model(
[decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states
)
# Reverse-lookup token index to decode sequences back to
# something readable.
reverse_input_char_index = dict((i, char) for char, i in input_token_index.items())
reverse_target_char_index = dict((i, char) for char, i in target_token_index.items())
def decode_sequence(input_seq):
# Encode the input as state vectors.
states_value = encoder_model.predict(input_seq, verbose=0)
# Generate empty target sequence of length 1.
target_seq = np.zeros((1, 1, num_decoder_tokens))
# Populate the first character of target sequence with the start character.
target_seq[0, 0, target_token_index["\t"]] = 1.0
# Sampling loop for a batch of sequences
# (to simplify, here we assume a batch of size 1).
stop_condition = False
decoded_sentence = ""
while not stop_condition:
output_tokens, h, c = decoder_model.predict(
[target_seq] + states_value, verbose=0
)
# Sample a token
sampled_token_index = np.argmax(output_tokens[0, -1, :])
sampled_char = reverse_target_char_index[sampled_token_index]
decoded_sentence += sampled_char
# Exit condition: either hit max length
# or find stop character.
if sampled_char == "\n" or len(decoded_sentence) > max_decoder_seq_length:
stop_condition = True
# Update the target sequence (of length 1).
target_seq = np.zeros((1, 1, num_decoder_tokens))
target_seq[0, 0, sampled_token_index] = 1.0
# Update states
states_value = [h, c]
return decoded_sentence
for seq_index in range(20):
# Take one sequence (part of the training set)
# for trying out decoding.
input_seq = encoder_input_data[seq_index : seq_index + 1]
decoded_sentence = decode_sequence(input_seq)
print("-")
print("Input sentence:", input_texts[seq_index])
print("Decoded sentence:", decoded_sentence)
- Input sentence: Olethan kohtelias. Decoded sentence: Tom is a good money. - Input sentence: Meillä on isompiakin ongelmia. Decoded sentence: Tom is a good money. - Input sentence: Miksi me valehtelisimme sinulle? Decoded sentence: Tom is a good money. - Input sentence: Tomi antoi Marille palan suklaata. Decoded sentence: Tom is a good money. - Input sentence: Etkö olekin hyvä tenniksessä? Decoded sentence: Tom is a good money. - Input sentence: Se voi tapahtua. Decoded sentence: Tom is a good money. - Input sentence: Minun äitini on todella hyvä golfaamaan. Decoded sentence: Tom is a good money. - Input sentence: Olen tottunut puhumaan siitä. Decoded sentence: Tom is a good money. - Input sentence: Sen edestään löytää mitä taakseen jättää. Decoded sentence: Tom is a good money. - Input sentence: Olen pomosi. Decoded sentence: Tom is a good money. - Input sentence: Tämä on kaiken loppu. Decoded sentence: Tom is a good money. - Input sentence: Haluatko, että vakoilen Tomia puolestasi? Decoded sentence: Tom is a good money. - Input sentence: Tavataan taas pian uudestaan. Decoded sentence: Tom is a good money. - Input sentence: Kuka pelaa lätkää tänä iltana? Decoded sentence: Tom is a good money. - Input sentence: Täällä on liian lämmintä. Decoded sentence: Tom is a good money. - Input sentence: Tomi haluu kellon synttärilahjaks. Decoded sentence: Tom is a good money. - Input sentence: Se on kaikki mitä minulla on. Decoded sentence: Tom is a good money. - Input sentence: Antakaa minulle kaukosäädin. Decoded sentence: Tom is a good money. - Input sentence: Tom oli ainoa, joka ei osannut puhua ranskaa. Decoded sentence: Tom is a good money. - Input sentence: Minä olen aika varma, että tulemme häviämään. Decoded sentence: Tom is a good money.
Model chyba znalazł jakiś "środek" w zbiorze danych jako target i tłumaczy każde zdanie na to (ma to trochę sens bo Tom występuje 36660 razy w zbiorze)
input_seq = encoder_input_data[0 : 1]
print(input_seq)
print(input_seq.shape)
[[[0. 0. 0. ... 0. 0. 0.] [0. 0. 0. ... 0. 0. 0.] [0. 0. 0. ... 0. 0. 0.] ... [1. 0. 0. ... 0. 0. 0.] [1. 0. 0. ... 0. 0. 0.] [1. 0. 0. ... 0. 0. 0.]]] (1, 211, 87)
decoded_sentence = decode_sequence(input_seq)
print(decoded_sentence)
Tom is a good money.
input_texts[0]
'Olethan kohtelias.'
test_input = "Se olen minä!"
encoded_test_input = np.zeros_like(input_seq)
for t, char in enumerate(test_input):
encoded_test_input[0, t, input_token_index[char]] = 1.0
encoded_test_input[0, t + 1 :, input_token_index[" "]] = 1.0
print(encoded_test_input)
print(encoded_test_input.shape)
[[[0. 0. 0. ... 0. 0. 0.] [0. 0. 0. ... 0. 0. 0.] [1. 0. 0. ... 0. 0. 0.] ... [1. 0. 0. ... 0. 0. 0.] [1. 0. 0. ... 0. 0. 0.] [1. 0. 0. ... 0. 0. 0.]]] (1, 211, 87)
def translate(sentence):
encoded_in = np.zeros(shape=(1,211,87))
for t, char in enumerate(sentence):
encoded_in[0, t, input_token_index[char]] = 1.0
encoded_in[0, t + 1 :, input_token_index[" "]] = 1.0
decoded_sentence = decode_sequence(encoded_in)
print("Input sentence:", sentence)
print("Decoded sentence:", decoded_sentence)
translate("Se olen minä!")
Input sentence: Se olen minä! Decoded sentence: Tom is a good money.
translate("Mene.")
Input sentence: Mene. Decoded sentence: Tom is a good money.
Wynik jest dosyć mało zadowalający i stwierdziłem, iż spróbuję skorzystać z wersji przedstawionej w dokumentacji pytorch