8.6 KiB
8.6 KiB
import numpy as np
import torch
x = torch.tensor([[2., -1.], [1., 1.]], requires_grad=True)
print(x)
tensor([[ 2., -1.], [ 1., 1.]], requires_grad=True)
out = x.pow(2).sum()
out.backward()
x.grad
tensor([[ 4., -2.], [ 2., 2.]])
Computing gradients for the same case that was present in Chain_rule.ipynb
notebook in previous chapter
x = np.array([[1,1]])
y = np.array([[0]])
x, y = [torch.tensor(i).float() for i in [x,y]]
W = [
np.array([[-0.0053, 0.3793],
[-0.5820, -0.5204],
[-0.2723, 0.1896]], dtype=np.float32).T,
np.array([-0.0140, 0.5607, -0.0628], dtype=np.float32),
np.array([[ 0.1528, -0.1745, -0.1135]], dtype=np.float32).T,
np.array([-0.5516], dtype=np.float32)
]
W = [torch.tensor(i, requires_grad=True) for i in W]
def feed_forward(inputs, outputs, weights):
pre_hidden = torch.matmul(inputs,weights[0])+ weights[1]
hidden = 1/(1+torch.exp(-pre_hidden))
out = torch.matmul(hidden, weights[2]) + weights[3]
mean_squared_error = torch.mean(torch.square(out - outputs))
return mean_squared_error
loss = feed_forward(x, y, W)
loss
tensor(0.3346, grad_fn=<MeanBackward0>)
loss.backward()
print([w.grad for w in W])
[tensor([[-0.0428, 0.0469, 0.0327], [-0.0428, 0.0469, 0.0327]]), tensor([-0.0428, 0.0469, 0.0327]), tensor([[-0.6814], [-0.4255], [-0.5364]]), tensor([-1.1568])]
updated_W = [w-w.grad for w in W]
updated_W
[tensor([[ 0.0375, -0.6289, -0.3050], [ 0.4221, -0.5673, 0.1569]], grad_fn=<SubBackward0>), tensor([ 0.0288, 0.5138, -0.0955], grad_fn=<SubBackward0>), tensor([[0.8342], [0.2510], [0.4229]], grad_fn=<SubBackward0>), tensor([0.6052], grad_fn=<SubBackward0>)]