petite-difference-challenge2/Copy_of_1_PyTorch.ipynb
2020-05-14 02:29:26 +02:00

148 KiB
Raw Permalink Blame History

Warsztaty z uczenia maszynowego A: problemy rozpoznawania mowy

Language Model

2020.04.17

Karol Kaczmarek

karol.kaczmarek [at] amu.edu.pl

pokój B2-36 (proszę o kontakt w razie spotkania)


Zaliczenia:

  1. Obecności
  2. Zadania z 2-części warsztatów (2020-04-17, 2020-05-15)
  3. Ocena końcowa to średnia ocena z 2-części

Agenda:

  1. PyTorch
  2. Language Model
  3. Architektura Transformer
  4. BERT, GPT-2, RoBERTa

Linki:

Literatura:


PyTorch

PyTorch is a Python package that provides two high-level features:

  • Tensor computation (like NumPy) with strong GPU acceleration
  • Deep neural networks built on a tape-based autograd system

Usually PyTorch is used either as:

  • a replacement for NumPy to use the power of GPUs.
  • a deep learning research platform that provides maximum flexibility and speed.

import torch
import numpy as np
x = torch.tensor([1, 5, 300, 9, 123])
print(x)
tensor([  1,   5, 300,   9, 123])
array = np.array([1, 5, 300, 9, 123])
print(type(array), '|', array)

tensor = torch.tensor(array)
print(type(tensor), ' |', tensor)
<class 'numpy.ndarray'> | [  1   5 300   9 123]
<class 'torch.Tensor'>  | tensor([  1,   5, 300,   9, 123])
tensor = torch.Tensor(2, 5)
print(tensor)
tensor([[4.3199e-05, 3.1458e-12, 7.9309e+34, 6.0022e+31, 4.2964e+24],
        [7.3162e+28, 1.6992e-07, 6.7331e+22, 6.7120e+22, 2.8795e+32]])
x = torch.empty(5)
print(x)
tensor([3.3827e+00, 4.5852e-41, 3.3827e+00, 4.5852e-41, 4.4842e-44])
x = torch.zeros(5)
print(x)
tensor([0., 0., 0., 0., 0.])
x = torch.ones(5)
print(x)
tensor([1., 1., 1., 1., 1.])
x = torch.rand(5)
print(x)
tensor([0.3035, 0.8395, 0.3348, 0.1965, 0.0337])
x = torch.randn(5)
print(x)
tensor([-1.8783, -0.4755, -0.9203,  0.6231,  0.1235])
x = torch.full((2, 3), 5)
print(x)
tensor([[5., 5., 5.],
        [5., 5., 5.]])
x = torch.full((2, 3), 5)
x = torch.arange(5)
print(x)
x = torch.arange(2, 5)
print(x)
tensor([0, 1, 2, 3, 4])
tensor([2, 3, 4])
torch.arange(1.0, 5.0, 0.5)
tensor([1.0000, 1.5000, 2.0000, 2.5000, 3.0000, 3.5000, 4.0000, 4.5000])
torch.linspace(3, 10, steps=5)
tensor([ 3.0000,  4.7500,  6.5000,  8.2500, 10.0000])
x = torch.linspace(1, 10, steps=5)
print(x)
tensor([ 1.0000,  3.2500,  5.5000,  7.7500, 10.0000])

x = torch.rand(5)
print(x)
tensor([0.8173, 0.8403, 0.7391, 0.2665, 0.8639])
print(x[2])
print(x[1:3])
tensor(0.7391)
tensor([0.8403, 0.7391])
x[2] = 99
print(x)
tensor([ 0.8173,  0.8403, 99.0000,  0.2665,  0.8639])
x[1] = True
print(x)
tensor([ 0.8173,  1.0000, 99.0000,  0.2665,  0.8639])
x = torch.rand(4, 7)
print(x)
tensor([[0.0321, 0.9135, 0.9144, 0.1919, 0.9376, 0.8586, 0.9644],
        [0.8811, 0.0165, 0.6718, 0.4107, 0.7261, 0.5029, 0.6465],
        [0.5959, 0.2529, 0.4962, 0.5259, 0.6860, 0.7571, 0.8950],
        [0.0176, 0.7755, 0.5761, 0.9369, 0.4213, 0.7168, 0.6567]])
print(x[:,-1])
tensor([0.9644, 0.6465, 0.8950, 0.6567])
print(x[1:3,2:5])
tensor([[0.6718, 0.4107, 0.7261],
        [0.4962, 0.5259, 0.6860]])
x[0,0:2] = torch.Tensor([1, 2])
print(x)
tensor([[1.0000, 2.0000, 0.9144, 0.1919, 0.9376, 0.8586, 0.9644],
        [0.8811, 0.0165, 0.6718, 0.4107, 0.7261, 0.5029, 0.6465],
        [0.5959, 0.2529, 0.4962, 0.5259, 0.6860, 0.7571, 0.8950],
        [0.0176, 0.7755, 0.5761, 0.9369, 0.4213, 0.7168, 0.6567]])
z = torch.empty(2, 4).random_(5, 80)
print(z)
tensor([[10., 34., 59., 52.],
        [21., 66., 79., 66.]])
x[2:2 + z.size(0), 1:1 + z.size(1)] = z
print(x)
tensor([[1.0000e+00, 2.0000e+00, 9.1440e-01, 1.9187e-01, 9.3764e-01, 8.5855e-01,
         9.6439e-01],
        [8.8110e-01, 1.6510e-02, 6.7177e-01, 4.1072e-01, 7.2609e-01, 5.0288e-01,
         6.4647e-01],
        [5.9593e-01, 1.0000e+01, 3.4000e+01, 5.9000e+01, 5.2000e+01, 7.5711e-01,
         8.9498e-01],
        [1.7649e-02, 2.1000e+01, 6.6000e+01, 7.9000e+01, 6.6000e+01, 7.1679e-01,
         6.5671e-01]])

x = torch.rand(6)
print(x.size())
print(x)
torch.Size([6])
tensor([0.9270, 0.2327, 0.3381, 0.7915, 0.7357, 0.4224])
x = torch.rand(6, 3)
print(x.size())
print(x)
torch.Size([6, 3])
tensor([[0.4974, 0.4085, 0.0589],
        [0.4546, 0.2406, 0.0320],
        [0.8759, 0.9207, 0.6263],
        [0.8034, 0.9810, 0.6037],
        [0.2395, 0.3363, 0.0798],
        [0.6728, 0.6035, 0.7921]])
x = torch.rand(3, 5, 4)
print(x.size())
print(x)
torch.Size([3, 5, 4])
tensor([[[0.2329, 0.9153, 0.1941, 0.8649],
         [0.2225, 0.3155, 0.5143, 0.6199],
         [0.4692, 0.2212, 0.5086, 0.6094],
         [0.3478, 0.0144, 0.3675, 0.8303],
         [0.2158, 0.9979, 0.0900, 0.9084]],

        [[0.3173, 0.2841, 0.7448, 0.1124],
         [0.8417, 0.6948, 0.2169, 0.2373],
         [0.0954, 0.6364, 0.0184, 0.7757],
         [0.2341, 0.9157, 0.4323, 0.6121],
         [0.0768, 0.7580, 0.1408, 0.0899]],

        [[0.1522, 0.3612, 0.5985, 0.4854],
         [0.1454, 0.3195, 0.0663, 0.4226],
         [0.6063, 0.8836, 0.4289, 0.0821],
         [0.2279, 0.4855, 0.8851, 0.7394],
         [0.3143, 0.1334, 0.3592, 0.6609]]])
x = torch.randn(4, 5)
print(x.size())
print(x.numel())
torch.Size([4, 5])
20
x = torch.randn(1)
print(x)
print(x.item())
tensor([-1.2007])
-1.200722336769104
x = torch.randn(5)
print(x)
print(x[2].item())
tensor([ 0.2868,  0.1182,  1.0450, -1.4448,  2.7314])
1.0450468063354492
print(x.item())
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-36-95a4debbca18> in <module>
----> 1 print(x.item())

ValueError: only one element tensors can be converted to Python scalars

x = torch.rand(5)
print(x.type())
print(x.dtype)
Data type dtype Tensor types
32-bit floating point torch.float32 or torch.float torch.*.FloatTensor
64-bit floating point torch.float64 or torch.double torch.*.DoubleTensor
16-bit floating point torch.float16 or torch.half torch.*.HalfTensor
8-bit integer (unsigned) torch.uint8 torch.*.ByteTensor
8-bit integer (signed) torch.int8 torch.*.CharTensor
16-bit integer (signed) torch.int16 or torch.short torch.*.ShortTensor
32-bit integer (signed) torch.int32 or torch.int torch.*.IntTensor
64-bit integer (signed) torch.int64 or torch.long torch.*.LongTensor
x = torch.LongTensor([1, 2, 3])
print(x)
print(x.type(), x.dtype)
x = torch.FloatTensor([1, 2, 3])
print(x)
print(x.type(), x.dtype)
x = torch.HalfTensor([1, 2, 3])
print(x)
print(x.type(), x.dtype)

x = torch.rand(5)
print(x)
print('*' * 25)
print(x + 5)
print('*' * 25)
print(x)
x = torch.rand(5)
y = torch.rand(5)
print(x)
print(y)
print('*' * 48)
print(x + y)
print('*' * 48)
print(x)
print(y)
x = torch.rand(5)
print(x)
print(x.sum())
x = torch.rand(5, 4)
print(x)
print(x.sum(dim=1))
x = torch.rand(5, 4)
print(x)
print(x.mean())
x = torch.rand(5, 4)
print(x)
print(x.mean(dim=0))
print(x.mean(dim=1))
x = torch.rand(5)
y = torch.rand(5)
print(x)
print(y)
print('*' * 48)
print(x.add(y))
print(torch.add(x, y))
print('*' * 48)
print(x)
print(y)
x = torch.rand(5)
y = torch.rand(5)
print(x)
print(y)
print('*' * 48)
print(x.add_(y))
print('*' * 48)
print(x)
print(y)

x = torch.randn(2, 3)
y = torch.randn(2, 3)
y[0][1] = x[0][1]
print(x)
print(y)
print(torch.equal(x, y))
print(torch.eq(x, y))
print(torch.gt(x, y))
print(torch.le(x, y))

x = torch.rand(4)
y = x.numpy()
print(type(x), x)
print(type(y), y)
x = np.random.rand(5)
y = torch.from_numpy(x)
print(type(x), x)
print(type(y), y)

x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8)
print(x.size())
print(x)
print(y.size())
print(y)
print(z.size())
print(z)
x = torch.randn(2, 3)
y = torch.randn(3, 3)
print(x)
print(y)
z = torch.cat((x, y))
print(z)
x = torch.randn(3, 2)
y = torch.randn(2, 2)
print(x)
print(y)
z = torch.cat((x, y))
print(z)
x = torch.randn(4, 5)
print(x)
y = torch.narrow(x, 0, 1, 2)
print(y)
z = torch.narrow(x, 1, 0, 3)
print(z)
x = torch.randn(4, 5)
print(x)
y = torch.reshape(x, (2, 2, -1))
print(y)
z = torch.reshape(x, (-1, 4))
print(z)
x = torch.rand(25)
print(x.size())
x = torch.chunk(x, 5)
for i in x:
    print(i.size())
x = torch.rand(25, 100)
print(x.size())
x = torch.chunk(x, 5)
for i in x:
    print(i.size())
x = torch.rand(25, 100)
print(x.size())
x = torch.chunk(x, 3, dim=1)
for i in x:
    print(i.size())
x1 = torch.rand(3, 5)
x2 = torch.rand(3, 5)
print(x1.size(), x2.size())
print(x1)
print(x2)
x3 = torch.stack([x1, x2])
print(x3.size())
print(x3)
x4, x5 = torch.unbind(x3)
print(x4.size(), torch.equal(x1, x4))
print(x4)
print(x5.size(), torch.equal(x2, x5))
print(x5)
x1 = torch.randn(1, 2, 1, 3)
print(x1.size())
print(x1)
x2 = torch.squeeze(x1)
print(x2.size())
print(x2)
x3 = torch.unsqueeze(x2, dim=1)
print(x3.size())
print(x3)
x4 = torch.unsqueeze(x3, dim=0)
print(x4.size(), torch.equal(x4, x1))
print(x4)
x = torch.randn(2, 3)
print(x)
print(x.t())
print(x)
x.t_()
print(x)
x = x.t()
print(x)
x = torch.randn(2, 3)
print(x)
print(x.transpose(0, 1))
a = torch.randn(3, 4)
print(a)
print(torch.argsort(a, dim=1))
print(torch.argsort(a, dim=0))
b = a.sum(dim=1)
print(b)
print(torch.argsort(b, dim=0))
x = torch.rand(10, 25).sum(dim=1)
print(x)
print(x.size())
print(torch.topk(x, 5))
x = torch.rand(5, 4)
print(x)
print(torch.triu(x))
print(torch.triu(x, diagonal=2))
print(torch.triu(x, diagonal=-1))

CUDA

torch.cuda.is_available()
!nvidia-smi
torch.cuda.device_count()
torch.cuda.current_device()
torch.cuda.get_device_name()
torch.cuda.set_device(0)
torch.cuda.manual_seed(123)
torch.cuda.manual_seed_all(123)
x = torch.rand(15000, 15000)
print(x.device)
print(x.size())
x = x.to('cuda:0')
print(x.device)
x = torch.rand(15000, 15000)
x = x.cuda()
print(x.device)
y = torch.rand(150, 150, device='cuda:0')
print(y.device)
y = torch.rand(150, 150, device=x.device)
print(y.device)

from tqdm.notebook import tqdm
x = torch.rand(15000, 15000)
y = x.to('cuda:0')
print(x.device, x.size())
print(y.device, y.size())
print(x[5:10,1])
print(y[5:10,1])
for i in tqdm(range(1500)):
    x += 1
for i in tqdm(range(1500)):
    y += 1
for i in tqdm(range(1500)):
    x.pow(2)
for i in tqdm(range(1500)):
    y.pow(2)

import pickle

vocab = {'This': 0, 'is': 1, 'sentence': 2, 'a': 3}
print(type(vocab), vocab)

with open('data.pkl', 'wb') as f:
    pickle.dump(vocab, f)

vocab = {}
print(type(vocab), vocab)
!ls data.pkl
with open('data.pkl', 'rb') as f:
    vocab = pickle.load(f)
print(type(vocab), vocab)

Autograd mechanics

Link

x = torch.ones(2, 2, requires_grad=True)
print(x)
y = x + 2
print(y)
print(y.grad_fn)
z = y * y * 3
out = z.mean()
print(z, out)

print(x.grad)
out.backward()
print(x.grad)
a = torch.randn(2, 2)
a *= 2
print(a.requires_grad)
a.requires_grad_(True)
print(a.requires_grad)
x = torch.randn(3, requires_grad=True)
print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
    print((x ** 2).requires_grad)

Neural Network

torch.nn

Linear layer

$$ Y = Wx + b $$

add image

  • dense or fully connected layers
  • torch.nn.Linear
  • torch.nn.Linear(in_features=10, out_features=5, bias=True)
m = torch.nn.Linear(100, 200)
print(m)
x = torch.rand(25, 100)
y = m(x)
print(x.size(), '->', y.size())
x = torch.rand(10, 3, 25, 100)
y = m(x)
print(x.size(), '->', y.size())

Non-linear activations

Sigmoid

$$ \sigma (x) = \frac{\mathrm{1} }{\mathrm{1} + e^{-x}} $$

Tanh

$$ \tanh(x) = \frac{\sin(x)}{cosh(x)} = \frac{e^x - e^{-x}}{e^x + e^{-x}} $$

ReLU

$$ f(x) = max(0, x) $$

m = torch.nn.ReLU()
print(m)
x = torch.rand(5, 6) - 0.5
print(x)
print(m(x))
x = torch.rand(5, 6) - 0.5
print(x)
print(torch.nn.functional.relu(x))

One-hot encoding

  • token is represented by a vector of length N, where N is the size of the vocabulary
  • vocabulary is the total number of unique words in the document

This is a sentence

token encoding
This 1000
is 0100
a 0010
sentence 0001

Word embedding

  • provides a dense representation of aword filled with floating numbers
  • common to use a word embedding of dimension size 50, 100, 256, 300, and sometimes 1,000
m = torch.nn.Embedding(10, 3)
print(m)
x = torch.LongTensor(5, 4).random_(0, 10)
print(x)
y = m(x)
print(y)
word_to_idx = {'This': 0, 'is': 1, 'a': 2, 'sentence': 3}

embeds = torch.nn.Embedding(len(word_to_idx), 8)

lookup_tensor = torch.tensor([word_to_idx['This'], word_to_idx['is'], word_to_idx['a']], dtype=torch.long)
hello_embed = embeds(lookup_tensor)

print(hello_embed)

One-hot encoding vs Word embedding

Represent a vocabulary of size 20000:

  • one-hot encoding = 20000 x 20000
  • word embedding = 20000 x dimension size

Softmax

m = torch.nn.Softmax(dim=1)
print(m)
x = torch.rand(5, 4)
print(x)
y = m(x)
print(y)
print(y.sum(dim=0), y.sum(dim=1))

Loss function

  • function which tells the model how close its predictions are to the actual values
  • torch.nn library has differentloss functions
loss function used
L1 loss Mostly used as a regularizer.
MSE loss Used as loss function for regression problems.
Cross-entropy loss Used for binary and multi-class classification problems.
NLL Loss Used for classification problems and allows us to use specific weights to handle imbalanced datasets.
NLL Loss2d Used for pixel-wise classification, mostly for problems related to image segmentation.
crit = torch.nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
print(input)
target = torch.empty(3, dtype=torch.long).random_(5)
print(target)
loss = crit(input, target)
print(loss)
loss.backward()

Optimizer

  • optimize the weights to reducethe loss and thus improving the accuracy of the algorithm
  • take loss functions and all the learnable parametersand move them slightly to improve our performances
  • torch.optim library has differentloss optimizers: (Adagrad, Adam, SparseAdam, ASGD, SGD)
optimizer = optim.Adam(model.parameters(),lr=0.01)

for input, target in dataset:
    optimizer.zero_grad()
    output = model(input)
    loss = loss_fn(output, target)
    loss.backward()
    optimizer.step()

Overfitting and underfitting

  • Overfitting: algorithm overfits when it performs well on the training dataset but fails to perform on unseen or validation and test datasets - memorize the dataset
  • getting more data
  • reducing the size of the network
  • applying weight regularizer
  • applying dropout
  • Underfitting: model may fail to learn any patterns from our training data
  • getting more data
  • increase the complexity of the model
  • applying weight regularizer

Dropout

add image

m = torch.nn.Dropout(p=0.5)
print(m)
x = torch.rand(3, 5)
print(x)
y = m(x)
print(y)

Layer Normalization

m = torch.nn.LayerNorm(125)
print(m)
x = torch.rand(15, 125)
print(x.size())
y = m(x)
print(y.size())

Module

class MyNet(torch.nn.Module):
    
    def __init__(self, hidden_size: int):
        super().__init__()
        self.hidden_size = hidden_size
        self.l1 = torch.nn.Linear(128, hidden_size)
        self.l2 = torch.nn.Linear(hidden_size, 128)
        
    def forward(self, x):
        y = self.l1(x)
        y = self.l2(y)
        return y
        # or shorter:
        # return self.l2(self.l1(x))
m = MyNet(256)
print(m)
MyNet(
  (l1): Linear(in_features=128, out_features=256, bias=True)
  (l2): Linear(in_features=256, out_features=128, bias=True)
)
x = torch.rand(15, 128)
y = m(x)
print(x.size(), '->', y.size())
print('Equal:', torch.equal(x, y))
torch.Size([15, 128]) -> torch.Size([15, 128])
Equal: False
print(m.l1.weight.device)
m.to('cuda')
print(m.l1.weight.device)
m.to('cpu')
print(m.l1.weight.device)
cpu
---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
<ipython-input-51-2a01d61c9283> in <module>
      1 print(m.l1.weight.device)
----> 2 m.to('cuda')
      3 print(m.l1.weight.device)
      4 m.to('cpu')
      5 print(m.l1.weight.device)

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in to(self, *args, **kwargs)
    423             return t.to(device, dtype if t.is_floating_point() else None, non_blocking)
    424 
--> 425         return self._apply(convert)
    426 
    427     def register_backward_hook(self, hook):

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _apply(self, fn)
    199     def _apply(self, fn):
    200         for module in self.children():
--> 201             module._apply(fn)
    202 
    203         def compute_should_use_set_data(tensor, tensor_applied):

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _apply(self, fn)
    221                 # `with torch.no_grad():`
    222                 with torch.no_grad():
--> 223                     param_applied = fn(param)
    224                 should_use_set_data = compute_should_use_set_data(param, param_applied)
    225                 if should_use_set_data:

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in convert(t)
    421 
    422         def convert(t):
--> 423             return t.to(device, dtype if t.is_floating_point() else None, non_blocking)
    424 
    425         return self._apply(convert)

/usr/local/lib/python3.6/dist-packages/torch/cuda/__init__.py in _lazy_init()
    194             raise RuntimeError(
    195                 "Cannot re-initialize CUDA in forked subprocess. " + msg)
--> 196         _check_driver()
    197         torch._C._cuda_init()
    198         _cudart = _load_cudart()

/usr/local/lib/python3.6/dist-packages/torch/cuda/__init__.py in _check_driver()
    108 Alternatively, go to: https://pytorch.org to install
    109 a PyTorch version that has been compiled with your version
--> 110 of the CUDA driver.""".format(str(torch._C._cuda_getDriverVersion())))
    111 
    112 

AssertionError: 
The NVIDIA driver on your system is too old (found version 9010).
Please update your GPU driver by downloading and installing a new
version from the URL: http://www.nvidia.com/Download/index.aspx
Alternatively, go to: https://pytorch.org to install
a PyTorch version that has been compiled with your version
of the CUDA driver.
print(m.training)
m.eval()
print(m.training)
m.train()
print(m.training)
True
False
True
print(m.state_dict().keys())
print(m.state_dict())
odict_keys(['l1.weight', 'l1.bias', 'l2.weight', 'l2.bias'])
OrderedDict([('l1.weight', tensor([[-0.0193,  0.0075, -0.0066,  ..., -0.0232,  0.0126, -0.0706],
        [-0.0387, -0.0597,  0.0671,  ..., -0.0475,  0.0777, -0.0336],
        [ 0.0489,  0.0128, -0.0551,  ...,  0.0771, -0.0471,  0.0243],
        ...,
        [ 0.0730,  0.0362, -0.0538,  ...,  0.0760, -0.0179,  0.0786],
        [-0.0768, -0.0611, -0.0077,  ...,  0.0236, -0.0485,  0.0571],
        [-0.0069,  0.0726,  0.0871,  ...,  0.0091,  0.0041, -0.0063]])), ('l1.bias', tensor([ 0.0219, -0.0626, -0.0179,  0.0733, -0.0071, -0.0866,  0.0232, -0.0150,
        -0.0016,  0.0404, -0.0839, -0.0130,  0.0267, -0.0126,  0.0706, -0.0795,
         0.0578,  0.0022,  0.0038, -0.0775,  0.0197,  0.0177,  0.0190,  0.0238,
        -0.0771, -0.0455, -0.0586,  0.0138, -0.0659, -0.0443, -0.0451,  0.0710,
        -0.0820, -0.0724,  0.0570,  0.0868, -0.0540, -0.0570,  0.0128,  0.0389,
        -0.0867,  0.0409, -0.0622,  0.0600,  0.0283, -0.0248,  0.0042,  0.0491,
         0.0648,  0.0127,  0.0698, -0.0265,  0.0230, -0.0714,  0.0833, -0.0269,
        -0.0736, -0.0079,  0.0478, -0.0519, -0.0653,  0.0830, -0.0777, -0.0535,
         0.0074,  0.0476,  0.0035,  0.0461, -0.0807, -0.0004, -0.0046, -0.0367,
         0.0829,  0.0214, -0.0596, -0.0537, -0.0799,  0.0054,  0.0637, -0.0382,
         0.0244, -0.0719,  0.0047,  0.0395,  0.0764,  0.0672, -0.0340, -0.0775,
        -0.0073,  0.0047, -0.0576,  0.0221, -0.0596,  0.0799,  0.0635,  0.0517,
         0.0770, -0.0358,  0.0741, -0.0596, -0.0574, -0.0674, -0.0645, -0.0172,
         0.0205, -0.0760, -0.0349, -0.0817,  0.0246,  0.0328,  0.0428, -0.0154,
         0.0564,  0.0180, -0.0686,  0.0088,  0.0675,  0.0701, -0.0609,  0.0883,
         0.0677, -0.0176, -0.0310, -0.0428, -0.0817, -0.0302, -0.0433, -0.0570,
        -0.0419, -0.0522,  0.0170,  0.0421,  0.0672, -0.0036,  0.0270,  0.0491,
         0.0763, -0.0390, -0.0704,  0.0649,  0.0582,  0.0628, -0.0334,  0.0134,
         0.0809,  0.0057, -0.0384, -0.0295, -0.0106,  0.0512, -0.0091, -0.0276,
         0.0173,  0.0516, -0.0086,  0.0867, -0.0710,  0.0380, -0.0555,  0.0762,
        -0.0620, -0.0151,  0.0367,  0.0822,  0.0403, -0.0171,  0.0730,  0.0584,
        -0.0025,  0.0582,  0.0745, -0.0616, -0.0494,  0.0356, -0.0272, -0.0097,
         0.0362,  0.0616, -0.0310,  0.0454, -0.0683,  0.0661, -0.0323,  0.0215,
         0.0375, -0.0673,  0.0543, -0.0783,  0.0621,  0.0392, -0.0194,  0.0852,
        -0.0134,  0.0145,  0.0352, -0.0670,  0.0305,  0.0383,  0.0451, -0.0536,
         0.0292,  0.0795,  0.0644,  0.0250,  0.0491,  0.0733, -0.0811, -0.0167,
        -0.0517,  0.0010, -0.0524, -0.0370,  0.0741, -0.0795, -0.0542, -0.0835,
        -0.0283, -0.0670, -0.0675,  0.0730, -0.0478,  0.0598, -0.0862,  0.0030,
         0.0799, -0.0124, -0.0847, -0.0193, -0.0309, -0.0768,  0.0870,  0.0615,
         0.0764,  0.0399, -0.0713,  0.0800, -0.0873, -0.0523,  0.0093,  0.0752,
         0.0873,  0.0189, -0.0734,  0.0567, -0.0150,  0.0674,  0.0450,  0.0549,
         0.0032, -0.0810, -0.0867,  0.0349, -0.0298, -0.0467,  0.0489,  0.0385])), ('l2.weight', tensor([[-1.4906e-02, -8.1936e-03, -3.2866e-02,  ...,  4.1430e-02,
         -2.1258e-02, -6.2108e-03],
        [ 8.5995e-05, -2.4441e-02, -9.7788e-03,  ...,  8.8740e-03,
         -1.8025e-02, -3.1464e-02],
        [-5.0128e-02, -3.9290e-02,  2.6125e-02,  ..., -3.5566e-02,
         -4.6239e-02,  4.8540e-02],
        ...,
        [ 1.8081e-02,  3.0562e-02,  5.0640e-03,  ...,  9.6664e-03,
          1.3690e-02, -3.0873e-02],
        [-1.5538e-02, -5.4558e-02,  5.5808e-02,  ...,  7.9329e-03,
          5.5620e-02, -8.7916e-03],
        [-2.6634e-02, -3.8213e-02, -8.9528e-04,  ..., -5.4874e-02,
          6.0545e-02, -2.8823e-02]])), ('l2.bias', tensor([-0.0385, -0.0052,  0.0611,  0.0359,  0.0577,  0.0022,  0.0614, -0.0162,
         0.0405,  0.0241, -0.0551, -0.0186,  0.0609,  0.0239,  0.0333,  0.0167,
         0.0574, -0.0497,  0.0081, -0.0538, -0.0290,  0.0122,  0.0559,  0.0121,
         0.0037,  0.0438, -0.0325, -0.0232,  0.0503,  0.0216,  0.0560, -0.0532,
        -0.0544, -0.0444,  0.0395, -0.0268, -0.0306, -0.0416,  0.0374, -0.0321,
        -0.0097,  0.0421,  0.0120,  0.0322,  0.0446, -0.0325, -0.0331,  0.0467,
         0.0436, -0.0291,  0.0013, -0.0409, -0.0135,  0.0585,  0.0115, -0.0048,
        -0.0196,  0.0101,  0.0267, -0.0068,  0.0485,  0.0515,  0.0018, -0.0068,
         0.0313,  0.0185,  0.0315,  0.0189, -0.0334,  0.0343, -0.0188, -0.0137,
        -0.0445,  0.0180, -0.0425,  0.0154,  0.0511,  0.0175,  0.0463,  0.0346,
         0.0139,  0.0059, -0.0299, -0.0025,  0.0011, -0.0146, -0.0022, -0.0430,
         0.0537, -0.0359, -0.0197,  0.0575, -0.0334,  0.0018, -0.0588, -0.0273,
         0.0210, -0.0210, -0.0481, -0.0075,  0.0230, -0.0032,  0.0305, -0.0393,
        -0.0206,  0.0116,  0.0087,  0.0124,  0.0216,  0.0537,  0.0299, -0.0616,
        -0.0033,  0.0413,  0.0301, -0.0422, -0.0118, -0.0486,  0.0547,  0.0172,
         0.0229, -0.0008, -0.0322, -0.0608, -0.0275, -0.0594, -0.0516,  0.0502]))])
model_data = {
    'config': {
        'hidden_size': m.hidden_size,
    },
    'state_dict': m.state_dict(),
}
print(model_data)
{'config': {'hidden_size': 256}, 'state_dict': OrderedDict([('l1.weight', tensor([[-0.0193,  0.0075, -0.0066,  ..., -0.0232,  0.0126, -0.0706],
        [-0.0387, -0.0597,  0.0671,  ..., -0.0475,  0.0777, -0.0336],
        [ 0.0489,  0.0128, -0.0551,  ...,  0.0771, -0.0471,  0.0243],
        ...,
        [ 0.0730,  0.0362, -0.0538,  ...,  0.0760, -0.0179,  0.0786],
        [-0.0768, -0.0611, -0.0077,  ...,  0.0236, -0.0485,  0.0571],
        [-0.0069,  0.0726,  0.0871,  ...,  0.0091,  0.0041, -0.0063]])), ('l1.bias', tensor([ 0.0219, -0.0626, -0.0179,  0.0733, -0.0071, -0.0866,  0.0232, -0.0150,
        -0.0016,  0.0404, -0.0839, -0.0130,  0.0267, -0.0126,  0.0706, -0.0795,
         0.0578,  0.0022,  0.0038, -0.0775,  0.0197,  0.0177,  0.0190,  0.0238,
        -0.0771, -0.0455, -0.0586,  0.0138, -0.0659, -0.0443, -0.0451,  0.0710,
        -0.0820, -0.0724,  0.0570,  0.0868, -0.0540, -0.0570,  0.0128,  0.0389,
        -0.0867,  0.0409, -0.0622,  0.0600,  0.0283, -0.0248,  0.0042,  0.0491,
         0.0648,  0.0127,  0.0698, -0.0265,  0.0230, -0.0714,  0.0833, -0.0269,
        -0.0736, -0.0079,  0.0478, -0.0519, -0.0653,  0.0830, -0.0777, -0.0535,
         0.0074,  0.0476,  0.0035,  0.0461, -0.0807, -0.0004, -0.0046, -0.0367,
         0.0829,  0.0214, -0.0596, -0.0537, -0.0799,  0.0054,  0.0637, -0.0382,
         0.0244, -0.0719,  0.0047,  0.0395,  0.0764,  0.0672, -0.0340, -0.0775,
        -0.0073,  0.0047, -0.0576,  0.0221, -0.0596,  0.0799,  0.0635,  0.0517,
         0.0770, -0.0358,  0.0741, -0.0596, -0.0574, -0.0674, -0.0645, -0.0172,
         0.0205, -0.0760, -0.0349, -0.0817,  0.0246,  0.0328,  0.0428, -0.0154,
         0.0564,  0.0180, -0.0686,  0.0088,  0.0675,  0.0701, -0.0609,  0.0883,
         0.0677, -0.0176, -0.0310, -0.0428, -0.0817, -0.0302, -0.0433, -0.0570,
        -0.0419, -0.0522,  0.0170,  0.0421,  0.0672, -0.0036,  0.0270,  0.0491,
         0.0763, -0.0390, -0.0704,  0.0649,  0.0582,  0.0628, -0.0334,  0.0134,
         0.0809,  0.0057, -0.0384, -0.0295, -0.0106,  0.0512, -0.0091, -0.0276,
         0.0173,  0.0516, -0.0086,  0.0867, -0.0710,  0.0380, -0.0555,  0.0762,
        -0.0620, -0.0151,  0.0367,  0.0822,  0.0403, -0.0171,  0.0730,  0.0584,
        -0.0025,  0.0582,  0.0745, -0.0616, -0.0494,  0.0356, -0.0272, -0.0097,
         0.0362,  0.0616, -0.0310,  0.0454, -0.0683,  0.0661, -0.0323,  0.0215,
         0.0375, -0.0673,  0.0543, -0.0783,  0.0621,  0.0392, -0.0194,  0.0852,
        -0.0134,  0.0145,  0.0352, -0.0670,  0.0305,  0.0383,  0.0451, -0.0536,
         0.0292,  0.0795,  0.0644,  0.0250,  0.0491,  0.0733, -0.0811, -0.0167,
        -0.0517,  0.0010, -0.0524, -0.0370,  0.0741, -0.0795, -0.0542, -0.0835,
        -0.0283, -0.0670, -0.0675,  0.0730, -0.0478,  0.0598, -0.0862,  0.0030,
         0.0799, -0.0124, -0.0847, -0.0193, -0.0309, -0.0768,  0.0870,  0.0615,
         0.0764,  0.0399, -0.0713,  0.0800, -0.0873, -0.0523,  0.0093,  0.0752,
         0.0873,  0.0189, -0.0734,  0.0567, -0.0150,  0.0674,  0.0450,  0.0549,
         0.0032, -0.0810, -0.0867,  0.0349, -0.0298, -0.0467,  0.0489,  0.0385])), ('l2.weight', tensor([[-1.4906e-02, -8.1936e-03, -3.2866e-02,  ...,  4.1430e-02,
         -2.1258e-02, -6.2108e-03],
        [ 8.5995e-05, -2.4441e-02, -9.7788e-03,  ...,  8.8740e-03,
         -1.8025e-02, -3.1464e-02],
        [-5.0128e-02, -3.9290e-02,  2.6125e-02,  ..., -3.5566e-02,
         -4.6239e-02,  4.8540e-02],
        ...,
        [ 1.8081e-02,  3.0562e-02,  5.0640e-03,  ...,  9.6664e-03,
          1.3690e-02, -3.0873e-02],
        [-1.5538e-02, -5.4558e-02,  5.5808e-02,  ...,  7.9329e-03,
          5.5620e-02, -8.7916e-03],
        [-2.6634e-02, -3.8213e-02, -8.9528e-04,  ..., -5.4874e-02,
          6.0545e-02, -2.8823e-02]])), ('l2.bias', tensor([-0.0385, -0.0052,  0.0611,  0.0359,  0.0577,  0.0022,  0.0614, -0.0162,
         0.0405,  0.0241, -0.0551, -0.0186,  0.0609,  0.0239,  0.0333,  0.0167,
         0.0574, -0.0497,  0.0081, -0.0538, -0.0290,  0.0122,  0.0559,  0.0121,
         0.0037,  0.0438, -0.0325, -0.0232,  0.0503,  0.0216,  0.0560, -0.0532,
        -0.0544, -0.0444,  0.0395, -0.0268, -0.0306, -0.0416,  0.0374, -0.0321,
        -0.0097,  0.0421,  0.0120,  0.0322,  0.0446, -0.0325, -0.0331,  0.0467,
         0.0436, -0.0291,  0.0013, -0.0409, -0.0135,  0.0585,  0.0115, -0.0048,
        -0.0196,  0.0101,  0.0267, -0.0068,  0.0485,  0.0515,  0.0018, -0.0068,
         0.0313,  0.0185,  0.0315,  0.0189, -0.0334,  0.0343, -0.0188, -0.0137,
        -0.0445,  0.0180, -0.0425,  0.0154,  0.0511,  0.0175,  0.0463,  0.0346,
         0.0139,  0.0059, -0.0299, -0.0025,  0.0011, -0.0146, -0.0022, -0.0430,
         0.0537, -0.0359, -0.0197,  0.0575, -0.0334,  0.0018, -0.0588, -0.0273,
         0.0210, -0.0210, -0.0481, -0.0075,  0.0230, -0.0032,  0.0305, -0.0393,
        -0.0206,  0.0116,  0.0087,  0.0124,  0.0216,  0.0537,  0.0299, -0.0616,
        -0.0033,  0.0413,  0.0301, -0.0422, -0.0118, -0.0486,  0.0547,  0.0172,
         0.0229, -0.0008, -0.0322, -0.0608, -0.0275, -0.0594, -0.0516,  0.0502]))])}
torch.save(model_data, 'my_model.bin')
!ls -lh my_model.bin
-rw-r--r-- 1 pawel pawel 259K maj  9 16:29 my_model.bin
loaded_data = torch.load('my_model.bin', map_location=torch.device('cpu'))
print(loaded_data.keys())
print(loaded_data['config'])
dict_keys(['config', 'state_dict'])
{'hidden_size': 256}
m = MyNet(**loaded_data['config'])
print(m)
m.load_state_dict(loaded_data['state_dict'])
MyNet(
  (l1): Linear(in_features=128, out_features=256, bias=True)
  (l2): Linear(in_features=256, out_features=128, bias=True)
)
<All keys matched successfully>

def init_weight(my_net: MyNet, uniform: bool = False):
    if uniform:
        torch.nn.init.xavier_uniform_(my_net.l1.weight)
        torch.nn.init.xavier_uniform_(my_net.l2.weight)
    else:
        torch.nn.init.xavier_normal_(my_net.l1.weight)
        torch.nn.init.xavier_normal_(my_net.l2.weight)
    torch.nn.init.zeros_(my_net.l1.bias)
    torch.nn.init.zeros_(my_net.l2.bias)
m1 = MyNet(256).eval()
init_weight(m1)
print(m1)

x1 = torch.rand(15, 128)

y1 = m1(x1)
y2 = m1(x1)
print('y1 == y2:', torch.equal(y1, y2))
model_data = {
    'config': {
        'hidden_size': m1.hidden_size,
    },
    'state_dict': m1.state_dict(),
}
torch.save(model_data, 'my_model.bin')
loaded_data = torch.load('my_model.bin', map_location=torch.device('cpu'))
m2 = MyNet(256).eval()
init_weight(m2, uniform=True)
print(m2)

z1 = m2(x1)
z2 = m2(x1)
print('z1 == z2:', torch.equal(z1, z2))
print('z1 == y1:', torch.equal(z1, y1))
m2.load_state_dict(loaded_data['state_dict'])
print(m2)

z1 = m2(x1)
z2 = m2(x1)
print('z1 == z2:', torch.equal(z1, z2))
print('z1 == y1:', torch.equal(z1, y1))
def almost_equal_tensors(x: torch.Tensor, y: torch.Tensor, epsilon: float = 1e-12) -> bool:
    if list(x.size()) != list(y.size()):
        print(f'Invalid tensor size: x: {list(x.size())} and y: {list(y.size())}')
        return False

    result_comp = torch.lt(torch.abs(x - y), epsilon)
    is_equal = bool(torch.all(result_comp))
    if not is_equal:
        # Count number of elements
        total_all = result_comp.numel()
        total_true = torch.sum(result_comp)
        percentage = float(total_true) / float(total_all) * 100.0
        print(f'Tensor are not equal with epsilon {epsilon} ({percentage:.2f}% correct)')
        return False
    return True
print(z1[0][:5])
print(y1[0][:5])

print(almost_equal_tensors(z1, y1))

print(almost_equal_tensors(z1, y1 - 1e-12))

print(almost_equal_tensors(z1, y1 - 1e-8))

Zadania:

Zadanie 1:

Napisz funkcję, która dla wektora obliczy odpowiednio sinus i cosinus według wzoru:

  • dla indeksów parzystych obliczy sinus
  • dla indeksów nieparzysztych obliczy cosinus
def function_1(x: torch.FloatTensor) -> torch.FloatTensor:
    x[::2] = x[::2].cos()
    x[1::2] = x[1::2].sin()
    return x
for i, x in enumerate([torch.FloatTensor([1, 2, 3]), torch.FloatTensor([23, 25, 33, 14, 85])]):
    print(f'Testing example {i}')
    y = function_1(x)
Testing example 0
Testing example 1

Zadanie 2:

Napisz funckję, która dla dwóch wektorów obliczy wyrażenie: 2.5*x + y^3 - mean(x, y)

Uwaga: dla operacji mean(x, y) można wykorzystać polecenia: torch.mean() oraz torch.stack

def function_2(x: torch.FloatTensor, y: torch.FloatTensor) -> torch.FloatTensor:
    return 2.5*x+y**3-torch.mean(torch.stack([x,y]))
for i, (x, y) in enumerate([(torch.FloatTensor([1, 2, 3]), torch.FloatTensor([3, 2, 1])),
                            (torch.FloatTensor([1, 1, 1, 1, 1]), torch.FloatTensor([8, 4, 123, 16, 12]))]):
    print(f'Testing example {i}')
    z = function_2(x, y)
Testing example 0
Testing example 1

Zadanie 3:

Wybierz najbardziej podobne 2 wektory do podanego na wejściu z dostępnego Tensora (dostępnych wektorów).

Uwaga: Do obliczenia podobieństwa wektorów można wykorzystać odległość euklidesową lub inną (np. odległość kosinusową - torch.nn.functional.cosine_similarity - dla wektorów 1D należy wykorzystać argument dim=0).

def function_3(vectors: torch.FloatTensor, search: torch.FloatTensor) -> torch.FloatTensor:
    result = []
    for vector in vectors:
        result.append(torch.nn.functional.cosine_similarity(vector, search, dim=0))
    vec1 = torch.tensor(result).argmax()
    result[vec1] = 0
    vec2 = torch.tensor(result).argmax()
    return [vectors[vec1], vectors[vec2]]
    
for i, (x, y) in enumerate([(torch.FloatTensor([[2, 2, 2], [1, 2, 1], [1, 0, 3], [123, 1, 1]]), torch.FloatTensor([1, 1, 1]))]):
    z = function_3(x, y)

Zadanie 4:

Zapisz dowolny element do pliku z wykorzystaniem biblioteki pickle, następnie usuń zapisany element z pamięci wykorzystując fukcję del, kolejno wczytaj zapisane dane i zmodyfikuj je w dowolny sposób.

import pickle

vocab = {'hello': 'world', 'a': 1, 'b': 2}

with open('plik.pkl', 'wb') as f:
    pickle.dump(vocab, f)
    
del vocab

with open('plik.pkl', 'rb') as f:
    vocab = pickle.load(f)

print(type(vocab), vocab)
<class 'dict'> {'hello': 'world', 'a': 1, 'b': 2}

Zadanie 5:

Napisz Moduł (torch.nn.Module) zawierający elementy:

  • emb: torch.nn.Embedding (num_tokens, emb_in)
  • norm: torch.nn.LayerNorm (emb_in)
  • act: torch.nn.ReLU
  • dropout: torch.nn.Dropout
  • out: torch.nn.Linear (emb_in, emb_out)

Proces przetwarzania danych powinien przebiegać według kolejności elementów przedstawionych powyżej.

Moduł powinien być konfigurowalny przy pomocy argumenty argumnetów:

  • emb_in: int - wielkość embeddingów wejściowych
  • emb_out: int - wielkość embeddingów wyjściowych (wymiar danych po wyjściu z sieci)
  • num_tokens: int - maksymalna wielkość/ilość tokenów (wielkość słownika) dla warstwy Embeddings
  • dropout: float - wielkośc/wartość dropout
class MyModule(torch.nn.Module):
    
    def __init__(self, emb_in: int, emb_out: int, num_tokens: int, dropout: float):
        super().__init__()
        
        self.emb_in = emb_in
        self.emb_out = emb_out
        self.num_tokens = num_tokens
        self.dropout = dropout
        
        self.emb = torch.nn.Embedding(num_tokens, emb_in)
        self.norm = torch.nn.LayerNorm(emb_in)
        self.act = torch.nn.ReLU()
        self.dropout = torch.nn.Dropout()
        self.out = torch.nn.Linear(emb_in, emb_out)
        
    def forward(self, x: torch.LongTensor) -> torch.FloatTensor:
        x = self.emb(x)
        x = self.norm(x)
        x = self.act(x)
        x = self.dropout(x)
        return self.out(x)
m = MyModule(128, 256, 15000, 0.1)
x = torch.LongTensor(15, 10).random_(0, 15000)
y = m(x)
if [15, 10, 256] != list(y.size()):
    print('Invalid out size!')

Zadanie 6:

Zapisz wcześniej utworzony moduł (state_dict) do pliku, następnie utworzyć nowy moduł i wczytać zapisany stan (state_dicy) z pliku.

model_data2 = {
    'config': {
        'emb_in': m.emb_in,
        'emb_out': m.emb_out,
        'num_tokens': m.num_tokens,
        'dropout': m.dropout
    },
    'state_dict': m.state_dict(),
}
print(model_data2)
{'config': {'emb_in': 128, 'emb_out': 256, 'num_tokens': 15000, 'dropout': Dropout(p=0.5, inplace=False)}, 'state_dict': OrderedDict([('emb.weight', tensor([[ 0.3181,  0.5409,  0.2802,  ...,  1.4907, -0.8508, -0.3714],
        [-1.4543, -1.9252, -0.2154,  ..., -1.7649, -0.3797,  1.3157],
        [ 0.4123,  0.0904,  0.3782,  ...,  0.2839, -1.3487, -0.4568],
        ...,
        [-0.2046, -1.8456, -1.0061,  ..., -1.7294,  1.0968,  1.2567],
        [ 0.3166,  0.1134,  0.4829,  ...,  0.5419,  1.8218,  0.1062],
        [ 0.1806,  1.1378, -0.7690,  ..., -0.8517, -0.0662,  2.5349]])), ('norm.weight', tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1.])), ('norm.bias', tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0.])), ('out.weight', tensor([[-0.0427,  0.0441, -0.0778,  ..., -0.0384,  0.0252, -0.0082],
        [ 0.0101, -0.0006,  0.0543,  ...,  0.0639, -0.0532, -0.0378],
        [-0.0627,  0.0415,  0.0211,  ..., -0.0243, -0.0347,  0.0444],
        ...,
        [-0.0765, -0.0017, -0.0058,  ..., -0.0366,  0.0115,  0.0024],
        [ 0.0844,  0.0104, -0.0770,  ..., -0.0650,  0.0419, -0.0571],
        [-0.0199,  0.0623,  0.0472,  ...,  0.0586, -0.0665,  0.0717]])), ('out.bias', tensor([ 0.0807,  0.0538,  0.0808,  0.0798, -0.0867,  0.0364, -0.0338, -0.0750,
        -0.0624,  0.0578, -0.0595, -0.0519,  0.0242,  0.0341, -0.0046,  0.0034,
         0.0744, -0.0828,  0.0522, -0.0591,  0.0234, -0.0155, -0.0669,  0.0694,
         0.0234, -0.0069, -0.0470,  0.0323,  0.0801,  0.0048, -0.0771,  0.0097,
         0.0318,  0.0041, -0.0518, -0.0077, -0.0581,  0.0649, -0.0353, -0.0339,
         0.0074,  0.0524, -0.0384,  0.0252,  0.0834, -0.0164, -0.0127, -0.0818,
        -0.0795, -0.0346, -0.0247, -0.0263, -0.0645, -0.0171, -0.0791,  0.0316,
         0.0246,  0.0514,  0.0805, -0.0352, -0.0384,  0.0822,  0.0143, -0.0194,
         0.0301,  0.0234,  0.0837, -0.0359, -0.0755, -0.0720,  0.0205,  0.0510,
        -0.0822, -0.0345, -0.0450, -0.0865,  0.0097,  0.0470, -0.0424,  0.0592,
         0.0839,  0.0185,  0.0343, -0.0713, -0.0009, -0.0313, -0.0393, -0.0536,
         0.0303,  0.0108, -0.0079, -0.0525, -0.0172,  0.0028, -0.0088, -0.0139,
        -0.0037,  0.0557,  0.0604,  0.0566,  0.0159,  0.0787, -0.0550, -0.0650,
        -0.0139,  0.0630,  0.0632, -0.0340,  0.0779, -0.0861,  0.0806, -0.0525,
        -0.0542,  0.0599,  0.0547, -0.0270,  0.0259,  0.0817, -0.0758, -0.0635,
         0.0197,  0.0381,  0.0670, -0.0456,  0.0132, -0.0556,  0.0806,  0.0556,
        -0.0370,  0.0374, -0.0303,  0.0883, -0.0455, -0.0288, -0.0531, -0.0717,
         0.0360,  0.0851,  0.0176, -0.0102, -0.0685,  0.0219, -0.0863, -0.0369,
         0.0194,  0.0326, -0.0380, -0.0864,  0.0331,  0.0660, -0.0605, -0.0727,
        -0.0422, -0.0582,  0.0377, -0.0252,  0.0390, -0.0279,  0.0408, -0.0144,
         0.0370, -0.0067,  0.0663,  0.0460, -0.0657, -0.0307, -0.0663, -0.0678,
        -0.0232, -0.0658,  0.0275,  0.0439,  0.0601,  0.0507, -0.0051,  0.0671,
        -0.0292,  0.0251, -0.0597, -0.0330, -0.0197,  0.0592,  0.0262, -0.0020,
         0.0717, -0.0775, -0.0040,  0.0712,  0.0472, -0.0311,  0.0785,  0.0867,
        -0.0761,  0.0844,  0.0460, -0.0339,  0.0648,  0.0578,  0.0248, -0.0784,
        -0.0087,  0.0319, -0.0527, -0.0848, -0.0872,  0.0444, -0.0635,  0.0744,
         0.0140, -0.0307,  0.0418, -0.0270, -0.0013, -0.0796, -0.0141,  0.0744,
         0.0279,  0.0577,  0.0059, -0.0824,  0.0754,  0.0733,  0.0683,  0.0858,
         0.0138, -0.0350,  0.0658,  0.0437, -0.0827, -0.0066,  0.0419, -0.0661,
         0.0656, -0.0691,  0.0229,  0.0016,  0.0524,  0.0430,  0.0787, -0.0427,
        -0.0234,  0.0625,  0.0276, -0.0373, -0.0679,  0.0328, -0.0031,  0.0240,
         0.0105,  0.0065,  0.0169, -0.0341, -0.0776,  0.0261,  0.0629, -0.0517]))])}
torch.save(model_data2, 'my_model2.bin')
loaded_data2 = torch.load('my_model2.bin', map_location=torch.device('cpu'))
m2 = MyModule(**loaded_data2['config'])
print(m2)
m2.load_state_dict(loaded_data2['state_dict'])
MyModule(
  (dropout): Dropout(p=0.5, inplace=False)
  (emb): Embedding(15000, 128)
  (norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
  (act): ReLU()
  (out): Linear(in_features=128, out_features=256, bias=True)
)
<All keys matched successfully>

_Uwagi:

  • Aby otrzymać dostęp do karty graficznej/obliczeń CUDA w _Google Colab należy kliknąć z menu u góry Runtime, po czym wybrać Change runtime type i w menu Hardware accelerator zaznaczyć GPU. Po przetworzeniu, karta graficzna/CUDA powinna być dostępna. UWAGA: po zmiane typu urządzanie należy wykonać część kroków ponownie (np. importowanie bibliotek, inicjalizacja zmiennych, czy definicja klas/funkcji), usunięte zostaną również wszytkie wcześniej utworzone dane.