数据集制作

首先我们直接上代码 还是用波士顿房价数据集作为测试

from sklearn import datasets  # 导入库
from sklearn.model_selection import train_test_splitboston = datasets.load_boston()  # 导入波士顿房价数据train = boston.data  # sample
target = boston.target  # target
# 切割数据样本集合测试集
X_train, x_test, y_train, y_true = train_test_split(train, target, test_size=0.2)  # 20%测试集;80%训练集

数据集要堆叠成时序的数据集 而且特征数目要为双数(这个我想是解码部分的sin cos的原因,当然你也可以少解一个) 堆叠代码如下:

# 對特征做一個操作 讓它翻倍以免出現不是雙數的情況
X_train_Double = []
for line in X_train:tempList = []for l in line:tempList.extend([l,l])X_train_Double.append([np.array(tempList),np.array(tempList)])X_train_Double = np.array(X_train_Double)X_test_Double = []
for line in x_test:tempList = []for l in line:tempList.extend([l,l])X_test_Double.append([np.array(tempList),np.array(tempList)])X_test_Double = np.array(X_test_Double)print("X_train_Double.shape:",X_train_Double.shape,"X_test_Double.shape:",X_test_Double.shape)
output:
X_train_Double.shape: (404, 2, 26) X_test_Double.shape: (102, 2, 26)

模型搭建


class PositionalEncoding(nn.Module):def __init__(self, d_model, max_len=5000):super(PositionalEncoding, self).__init__()      pe = torch.zeros(max_len, d_model)position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))pe[:, 0::2] = torch.sin(position * div_term)pe[:, 1::2] = torch.cos(position * div_term)pe = pe.unsqueeze(0).transpose(0, 1)#pe.requires_grad = Falseself.register_buffer('pe', pe)def forward(self, x):
#         print("PositionalEncoding",x.size())return x + self.pe[:x.size(0), :]class TransAm(nn.Module):def __init__(self,feature_size=250,num_layers=1,dropout=0.1):super(TransAm, self).__init__()self.model_type = 'Transformer'self.src_mask = Noneself.pos_encoder = PositionalEncoding(feature_size)self.encoder_layer = nn.TransformerEncoderLayer(d_model=feature_size, nhead=2, dropout=dropout)self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)        self.decoder = nn.Linear(2*feature_size,1)self.init_weights()self.feature_size = feature_sizeself.num_layers   = num_layersself.dropout      = dropoutdef feature(self):return{"feature_size":self.feature_size,"num_layers":self.num_layers,"dropout":self.dropout}def init_weights(self):initrange = 0.1    self.decoder.bias.data.zero_()self.decoder.weight.data.uniform_(-initrange, initrange)def forward(self,src):
#         print("0",src.shape)if self.src_mask is None or self.src_mask.size(0) != len(src):device = src.devicemask = self._generate_square_subsequent_mask(len(src)).to(device)self.src_mask = mask
#         print("1",src.shape)src = self.pos_encoder(src)
#         print("2",src.shape)output = self.transformer_encoder(src,self.src_mask)#, self.src_mask)output = output.view(output.shape[0], -1)
#         print("3",output.shape)output = self.decoder(output)
#         print("4",output.shape)return outputdef _generate_square_subsequent_mask(self, sz):mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))return mask

优化器和其他超参数搭建

# library
# standard library
import os
# third-party library
import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision
import matplotlib.pyplot as pltimport time
from sklearn import metricsimport numpy as np
import pandas as pd
from torch.utils.data import DataLoader
from torch.autograd import Variable
import numpy as npclass General_Regression_Training_3d():# 給優化函數判斷模型效果用的def fitness(evaluationStr="r2"):if (evaluationStr == "r2"):return self.r2elif (evaluationStr == "r2_adjusted"):return self.r2_adjustedelif (evaluationStr == "rmsle"):return self.rmsleelif (evaluationStr == "mape"):return self.mapeelif (evaluationStr == "r2_adjusted"):return self.r2_adjustedelif (evaluationStr == "mad"):return self.madelif (evaluationStr == "mae"):return self.mae# 保存参数  预测值 真实值 图片def save_results(self):# , resultTitle, resultList, y_test, test_prediction, save_pathresultTitle     = [str(line) for line in self.resultDict.keys()]resultList      = [ "_".join([ str(l) for l in line]) if isinstance(line,list) else str(line) for line in self.resultDict.values()]y_test          = self.y_testtest_prediction = self.test_predictionsave_path       = self.save_path# 计算行数,匹配 prediciton 的保存save_result = "/".join([save_path, 'result.csv'])if not os.path.exists(save_path):os.makedirs(save_path)try:count = len(open(save_result, 'rU').readlines())except:count = 1# 判断是否存在未见 没有则写入文件 有则追加写入resultTitle.insert(0, "count")resultList.insert(0, str(count))if not os.path.exists(save_result):with open(save_result, 'w') as f:titleStr = ",".join(resultTitle)f.write(titleStr)f.write('\n')with open(save_result, 'a+') as f:contentStr = ",".join(resultList)f.write(contentStr)f.write('\n')# 保存 train loss 和 test lossLoss_path = os.path.join(save_path, 'Loss')if not os.path.exists(Loss_path):os.makedirs(Loss_path)save_Loss = os.path.join(Loss_path, str(count) + '.csv')df = pd.DataFrame()df["TrainLoss"] = self.TrainLossesdf["TestLoss"] = self.TestLossesdf.to_csv(save_Loss, index=False)# 保存 predictionpred_path = os.path.join(save_path, 'Prediction')if not os.path.exists(pred_path):os.makedirs(pred_path)save_prediction = os.path.join(pred_path, str(count) + '.csv')df = pd.DataFrame()df["y_test"] = [i for i in y_test]df["test_prediction"] =[i for i in test_prediction]df.to_csv(save_prediction, index=False)print('Save the value of prediction successfully!!')# save the model weightmodel_path = os.path.join(save_path, 'Model')if not os.path.exists(model_path):os.makedirs(model_path)if(self.use_more_gpu):torch.save(self.net.state_dict(), os.path.join(model_path, str(count) + ".pth"))else:torch.save(self.net.state_dict(), os.path.join(model_path, str(count) + ".pth"))return countdef reg_calculate(self,true, prediction, features=None):'''To calculate the result of regression,including mse, rmse, mae, r2, four criterions.'''prediction[prediction < 0] = 0mse = metrics.mean_squared_error(true, prediction)rmse = np.sqrt(mse)mae = metrics.mean_absolute_error(true, prediction)mape = np.mean(np.abs((true - prediction) / true)) * 100r2 = metrics.r2_score(true, prediction)rmsle = np.sqrt(metrics.mean_squared_log_error(true, prediction))try:n = len(true)p = featuresr2_adjusted = 1-((1-metrics.r2_score(true, prediction))*(n-1))/(n-p-1)except:# print("mse: {}, rmse: {}, mae: {}, mape: {}, r2: {}, rmsle: {}".format(mse, rmse, mae, mape, r2, rmsle))print('if you wanna get the value of r2_adjusted, you can define the number of features, ''which is the third parameter.')return mse, rmse, mae, mape, r2, rmsle# print("mse: {}, rmse: {}, mae: {}, mape: {}, r2: {}, r2_adjusted: {}, rmsle: {}".format(mse, rmse, mae, mape,r2, r2_adjusted, rmsle))return mse, rmse, mae, mape, r2, r2_adjusted, rmsledef __init__(self,net,learning_rate = [1e-3,1e-5,1e-7], batch_size = 1024, epoch = 2000, use_more_gpu = False,weight_decay=1e-8, device=0 ,save_path='CNN_Result'):self.net = netself.resultDict = {"learning_rate":learning_rate,"batch_size":batch_size,"epoch":epoch,"weight_decay":weight_decay,"use_more_gpu":use_more_gpu,"device":device,}self.resultDict = dict(self.resultDict,**self.net.feature())self.batch_size = batch_sizeself.use_more_gpu = use_more_gpuself.lr = learning_rateself.epoch = epochself.weight_decay = weight_decayself.device = deviceself.epoch = epochself.save_path = save_path  # 设置一条保存路径,直接把所有的值都收藏起来if not os.path.exists(self.save_path):os.makedirs(self.save_path)self.avgLossList = []  # put the avgLoss dataself.TrainLosses = []self.TestLosses = []self.t = 0self.D = []self.n = 0  # 来记录 梯度衰减 的次数self.limit = [1e-5, 1e-6, 1e-7]# 創建數據生成器def create_batch_size(self, X_train, y_train):p = np.random.permutation(X_train.shape[0])data = X_train[p]label = y_train[p]batch_size = self.batch_sizebatch_len = X_train.shape[0] // batch_size + 1b_datas = []b_labels = []for i in range(batch_len):try:batch_data = data[batch_size * i: batch_size * (i + 1)]batch_label = label[batch_size * i: batch_size * (i + 1)]except:batch_data = data[batch_size * i: -1]batch_label = label[batch_size * i: -1]b_datas.append(batch_data)b_labels.append(batch_label)return b_datas, b_labels# 訓練函數def fit(self, X_train, y_train, X_test, y_test):''' training the network '''# input the dataset and transform into dataLoad# if y is a scalarif y_train.ndim == 1:y_train = y_train.reshape(-1, 1)if y_test.ndim == 1:y_test = y_test.reshape(-1, 1)self.X_train, self.X_test, self.y_train, self.y_test = X_train, X_test, y_train, y_testb_data, b_labels = self.create_batch_size(X_train, y_train)save_result = os.path.join(self.save_path, 'Results.csv')try:count = len(open(save_result, 'rU').readlines())except:count = 1net_weight = os.path.join(self.save_path, 'Weight')if not os.path.exists(net_weight):os.makedirs(net_weight)net_path = os.path.join(net_weight, str(count) + '.pkl')net_para_path = os.path.join(net_weight, str(count) + '_parameters.pkl')# set the net use cpu or gpudevice = torch.device(self.device if torch.cuda.is_available() else "cpu")if torch.cuda.is_available():print("Let's use GPU: {}".format(self.device))else:print("Let's use CPU")if self.use_more_gpu and torch.cuda.device_count() > 1:print("Let's use", torch.cuda.device_count(), "GPUs")# dim = 0 [64, xxx] -> [32, ...], [32, ...] on 2GPUsself.net = nn.DataParallel(self.net)self.net.to(device)# network change to train model self.net.train()# set optimizer and loss functiontry:optim = torch.optim.Adam(self.net.parameters(), lr=self.lr[0], weight_decay=self.weight_decay)except:optim = torch.optim.Adam(self.net.parameters(), lr=self.lr, weight_decay=self.weight_decay)criterion = torch.nn.MSELoss()print("")# Officially start trainingstart = time.time() # 计算时间limit = self.limit[0]for e in range(self.epoch):tempLoss = []# 訓練模式self.net.train()for i in range(len(b_data)):if torch.cuda.is_available():#print('cuda')#self.net = self.net.cuda()train_x = Variable(torch.FloatTensor(b_data[i])).to(device)train_y = Variable(torch.FloatTensor(b_labels[i])).to(device)else:train_x = Variable(torch.FloatTensor(b_data[i]))train_y = Variable(torch.FloatTensor(b_labels[i]))prediction = self.net(train_x)loss = criterion(prediction, train_y)tempLoss.append(float(loss))optim.zero_grad()loss.backward()optim.step()self.D.append(loss.cpu().data.numpy())avgloss =  np.array(tempLoss).sum() / len(tempLoss)self.avgLossList.append(avgloss)if( ( e + 1 ) % 100 == 0):print('Training... epoch: {}, loss: {}'.format((e + 1), self.avgLossList[-1]))self.net.eval()if torch.cuda.is_available():test_x = Variable(torch.FloatTensor(self.X_test)).to(device)test_y = Variable(torch.FloatTensor(self.y_test)).to(device)else:test_x = Variable(torch.FloatTensor(self.X_test))test_y = Variable(torch.FloatTensor(self.y_test))test_prediction = self.net(test_x)test_loss = criterion(test_prediction, test_y)self.TrainLosses.append(avgloss)self.TestLosses.append(test_loss.cpu().data.numpy())self.test_prediction = test_prediction.cpu().data.numpy()self.test_prediction[self.test_prediction < 0] = 0# self.mse, self.rmse, self.mae, self.mape, \#     self.r2, self.r2_adjusted, self.rmsle = self.reg_calculate(self.y_test, self.test_prediction  ,self.X_test.shape[-1] )#test_acc = self.__get_acc(test_prediction, test_y)# print('\033[1;35m Testing... epoch: {}, loss: {} , r2 {}\033[0m!'.format((e + 1), test_loss.cpu().data.numpy(), self.r2))#                 plt.figure(figsize = (7,5))       #figsize是图片的大小`
#                 plt.plot( [i for  i in range(len(self.avgLossList))] ,self.avgLossList,'g-',label=u'Dense_Unet(block layer=5)')
#                 plt.legend()
#                 plt.xlabel(u'iters')
#                 plt.ylabel(u'loss')
#                 plt.title('Compare loss for different models in training')
#                 plt.show()# epoch 终止装置if len(self.D) >= 20:loss1 = np.mean(np.array(self.D[-20:-10]))loss2 = np.mean(np.array(self.D[-10:]))d = np.float(np.abs(loss2 - loss1)) # 計算loss的差值if d < limit or e == self.epoch-1  or e > (self.epoch-1)/3 * (self.n + 1)   : # 加入遍历完都没达成limit限定,就直接得到结果  self.D = []  # 重置self.n += 1print('The error changes within {}'.format(limit))self.e = e + 1#train_acc = self.__get_acc(prediction, train_y)print('Training... epoch: {}, loss: {}'.format((e + 1), loss.cpu().data.numpy()))# torch.save(self.net.module.state_dict(), model_out_path) 多 GPU 保存torch.save(self.net, net_path)torch.save(self.net.state_dict(), net_para_path)self.net.eval()if torch.cuda.is_available():test_x = Variable(torch.FloatTensor(self.X_test)).to(device)test_y = Variable(torch.FloatTensor(self.y_test)).to(device)else:test_x = Variable(torch.FloatTensor(self.X_test))test_y = Variable(torch.FloatTensor(self.y_test))test_prediction = self.net(test_x)test_loss = criterion(test_prediction, test_y)self.test_prediction = test_prediction.cpu().data.numpy()self.test_prediction[self.test_prediction < 0] = 0#                     print("self.y_test",np.array(self.y_test).shape)
#                     print("self.test_prediction",self.test_prediction.shape)
#                     print("self.test_prediction",self.test_prediction)
#                     print("self.X_test.shape[-1]",self.X_test.shape[-1])self.mse, self.rmse, self.mae, self.mape, \self.r2, self.r2_adjusted, self.rmsle = self.reg_calculate(self.y_test, self.test_prediction  ,self.X_test.shape[-1] )#test_acc = self.__get_acc(test_prediction, test_y)print('\033[1;35m Testing... epoch: {}, loss: {} , r2 {}\033[0m!'.format((e + 1), test_loss.cpu().data.numpy(), self.r2))# 已经梯度衰减了 2 次if self.n == 3:print('The meaning of the loop is not big, stop!!')breaklimit = self.limit[self.n]print('Now learning rate is : {}'.format(self.lr[self.n]))optim.param_groups[0]["lr"] = self.lr[self.n]end = time.time()self.t = end - startprint('Training completed!!! Time consuming: {}'.format(str(self.t)))#resDict = {"mse":self.mse, "rmse":self.rmse, "mae":self.mae, "mape":self.mape, "r2":self.r2, "r2_adjusted":self.r2_adjusted, "rmsle":self.rmsle}self.resultDict = dict(resDict,**self.resultDict)# 计算结果self.mse, self.rmse, self.mae, self.mape, \self.r2, self.r2_adjusted, self.rmsle = self.reg_calculate(self.y_test, self.test_prediction,self.X_test.shape[-1])# 給優化函數判斷模型效果用的 def fitness(evaluationStr = "r2"):if(evaluationStr == "r2"):return self.r2elif(evaluationStr == "r2_adjusted"):return  self.r2_adjustedelif(evaluationStr == "rmsle"):return  self.rmsleelif(evaluationStr == "mape"):return  self.mapeelif(evaluationStr == "r2_adjusted"):return  self.r2_adjustedelif(evaluationStr == "mad"):return  self.madelif(evaluationStr == "mae"):return  self.mae

开始训练模型啦

model = TransAm(feature_size=26,num_layers=1,dropout=0.5)
grt = General_Regression_Training_3d(model,learning_rate = [1e-3,1e-6,1e-8],batch_size = 512,use_more_gpu = False,weight_decay=1e-3, device=0 ,save_path='transformer_Result',epoch = 20000)grt.fit(X_train_Double, y_train, X_test_Double, y_true )

其中 feature_size 是特征数量

num_layers 是transformer的层数

使用 Transformer 做预测 (代码+原理)相关推荐

  1. 如何用神经网络预测数据,神经网络做预测的原理

    神经网络算法原理? 神经网络预测学习样本中的驾驶行为特征. 如图显示了某个驾驶场景的行驶路径深度学习训练,通过神经网络可以学习驾驶人的行为,并根据当前获取的环境信息决策行驶轨迹,进而可以控制车辆的转向 ...

  2. 神经网络可以用来预测吗,神经网络做预测的原理

    1.采用什么手段使神经网络预测更加准确 优化神经网络结构.如BP神经网络改变隐层神经元数量.训练算法等: 使用其他神经网络.如Elman神经网络考虑了前一时刻的输出,比较适合用于预测,预测效果往往更好 ...

  3. python预测实例教程_手把手教你用Python库Keras做预测(附代码)-阿里云开发者社区...

    当你在Keras中选择好最合适的深度学习模型,就可以用它在新的数据实例上做预测了.但是很多初学者不知道该怎样做好这一点,我经常能看到下面这样的问题: "我应该如何用Keras对我的模型作出预 ...

  4. 华人团队用Transformer做风格迁移,速度快、可试玩,网友却不买账

    视学算法报道 编辑:杜伟 利用神经网络进行风格迁移是一项非常常见的任务,方法也很多,比如基于优化和基于 RL 的方法.最近,来自百度 VIS 团队的研究者提出了一种基于 Transformer 的风格 ...

  5. 如何在Java应用里集成Spark MLlib训练好的模型做预测

    前言 昨天媛媛说,你是不是很久没写博客了.我说上一篇1.26号,昨天3.26号,刚好两个月,心中也略微有些愧疚.今天正好有个好朋友问,怎么在Java应用里集成Spark MLlib训练好的模型.在St ...

  6. android listview item 展开动画,android的ListView点击item使item展开的做法的实现代码

    本文介绍了android的ListView点击item使item展开的做法的实现代码,分享给大家,具体如下: 效果图: 原理是点击item的时候,重新measure list的各个item的高度 li ...

  7. 极简权限认证必须掌握【代码+原理+建议收藏】

    这个极简权限认证必须掌握,代码不过百,但是很关键 小白最近没有来问学委问题,不过前几天,有朋友问到如何进行访问控制,资源控制的,学委特地写了一篇. 这其实就是权限认证,理解并掌握其核心思想很重要,而且 ...

  8. 骨龄预测代码学习(一)

    骨龄预测代码学习(一) 代码/数据来源: 代码详解: 运行结果: 总结: 代码/数据来源: 数据集:RSNA. 代码:github. 代码详解: 从GitHub里下载的代码....完全看不懂,所以今天 ...

  9. 直播网站源码直播平台软件开发iOS动手做一个直播(原理篇)

    直播网站源码直播平台软件开发iOS动手做一个直播(原理篇) 上篇文章主要给出了代码,但是并没有详细说明直播相关的知识,这篇文章就说一下直播的相关理论知识.附上直播代码篇地址. ###推流 腾讯直播平台 ...

最新文章

  1. openstack学习笔记三 创建第一个实例
  2. 'str' object is not callable
  3. python 装饰器 参数-python装饰器的详细解析
  4. JavaScript中window.open用法实例详解
  5. 程序员成长的三个方法
  6. 使用CDS view开发SAP Marketing contact的facet追溯工具
  7. c 定义结构体时提示应输入声明_C++|了解结构体的内存对齐(成员声明的顺序影响占用空间大小)...
  8. 如何运行vue项目(从gethub上download的开源项目)
  9. 交个朋友,还得看小米
  10. [技術]如何合併 GridView 中的多個標題
  11. Cass环境下光标无显示
  12. delphi 标题栏相关操作
  13. 编程基本功:学会抄,自然就会创新
  14. 跨平台数据库ODB实战4-Person类的聚合查询
  15. Holy Grail 计蒜客41305
  16. Go 高性能编程心法探秘
  17. 系统突然变慢的处理方案
  18. 海康威视工程师谈嵌入式软件
  19. python云计算主要是干嘛的_阿里巴巴python 云计算是干什么的
  20. swapidc,QCNSWAP,模板

热门文章

  1. 【JAVA百炼成仙】金丹篇——JAVA流程控制
  2. 情景感知:基本概念、关键技术与应用系统
  3. 拼多多回应,关于个别用户反馈“vivo 手机提示拼多多删除照片”的说明
  4. NLP --- 对抗学习:从FGM, PGD到FreeLB
  5. VMware虚拟机无法识别USB设备:USB摄像头、U盘等
  6. unet脑肿瘤分割_BraTS18——多模态MR图像脑肿瘤分割挑战赛续6
  7. 关于流媒体服务器的基本概念梳理
  8. 诗经 -小雅 - 南山有台
  9. TF乘法之multiply、matmul、*
  10. FlyAI小课堂:Tensorflow-分布式训练