

%matplotlib inline
import random
import torch
# from d2l import torch as d2l#prepare the data
def synthetic_data(w, b, num_examples):"""y = XW+b"""x = torch.normal(0,1,(num_examples,len(w)))y = torch.matmul(x,w)+by += torch.normal(0, 0.01, y.shape)return x, y.reshape((-1, 1))true_w = torch.tensor([2, -3.4])
true_b = 4.2features, labels = synthetic_data(true_w, true_b, 1000) # data iteration
def data_iter(batch_size, features, labels):num_examples = len(features)indices = list(range(num_examples))random.shuffle(indices)for i in range(0, num_examples, batch_size):batch_indices = torch.tensor(indices[i:min(i+batch_size, num_examples)]#最后一组可能会超过总的数量,所以就选取最后剩下的样本作为最后一组)yield features[batch_indices], labels[batch_indices]# model
def linereg(x,w,b):return torch.matmul(x,w) + b# loss
def mse_loss(y_hat, y):return ((y_hat - y)**2) / 2# sgd
def sgd(params, lr, batch_size):with torch.no_grad():for param in params:# print(id(param))#注意这里要原地操作,如果写成param = param - 的形式,param的地址就改变了,就不携带梯度了,所以会报错 AttributeError: 'NoneType' object has no attribute 'zero_'#param = param - lr * param.grad / batch_size# print(id(param))param -= lr * param.grad / batch_sizeparam.grad.zero_()# init the parameters
w = torch.normal(0, 0.01, size=(2, 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)# train
num_epochs = 10
lr = 1
net = linereg
loss = mse_loss
batch_size = 10
for epoch in range(num_epochs):for x,y in data_iter(batch_size, features, labels):train_loss = loss(net(x,w,b), y)train_loss.sum().backward()#梯度回传# print(w.grad)sgd([w,b], lr, batch_size)# testwith torch.no_grad():l = loss(net(features, w, b), labels)print(f'epoch {epoch+1}, loss{float(l.mean()):f}')print("b loss", (true_b - b))
print("w_loss", (true_w - w.reshape(true_w.shape)))


