import os import numpy as np import pandas as pd import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import Dataset, DataLoader # 设置环境和超参数 ## 方案一:使用os.environ # os.environ['CUDA_VISIBLE_DEVICES']='0' ## 方案二:使用“device”,后续对要使用GPU的变量用.to(device)即可 device = torch.device('cuda:1' if torch.cuda is_available() else 'cpu') ## 配置其他超参数,如batch_size, num_workers, learning rate, 以及总的epochs batch_size = 256 num_workers = 4 # 对于Windows用户,这里应设置为0,否则会出现多线程错误 lr = 1e-4 epochs = 20 # 设置数据变换 from torchvision import transforms image_size = 28 data_transform = transform.Compose([ transform.ToPILImage(), # 这一步取决于后续的数据读取方式,如果使用内置数据集读取方式则不需要 transform.Resize(image_size), transform.ToTensor()]) ## 读取方式一:使用torchvision自带数据集,下载可能需要一段时间 from torchvision import datasets train_data = datasets.FashionMNIST(root='./', train=True, download=True, transform=data_transform) test_data = datasets.FashionMNIST(root='./', train=False, download=True, transform=data_transform) # 定义DataLoader类,加载数据 # drop_last对最后无法满足 batch_size大小的皮数据予以丢弃 train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True) test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=num_workers) # 据可视化操作,验证读入的数据是否正确 import matplotlib.pyplot as plt image, label = next(iter(train_loader)) print(image.shape, label.shape) plt.imshow(image[0][0], cmap="gray") # 模型设计 class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv = nn.Sequential( nn.Conv2d(1, 32, 5), nn.ReLU(), nn.MaxPool2d(2, stride = 2), nn.Dropout(0.3), nn.Conv2d(32, 64, 5), nn.ReLU(), nn.MaxPool2d(2, stride=2), nn.Dropout(0.3)) self.fc = nn.Sequential( nn.Linear(64*4*4, 512), nn.ReLU(), nn.Linear(512, 10)) def forward(self, x): x = self.conv(x) x = x.view(-1, 64*4*4) x = self.fc(x) return x model = Net() model = model.cuda() # model = nn.DataParallel(model).cuda() # 多卡训练时的写法 ## 设定损失函数 # 使用CrossEntropy损失会,自动把整数型的label转为one-hot型,用于计算CE loss criterion = nn.CrossEntropyLoss() ## 设置优化器 optimizer = optim.Adam(model.parameters(), lr=0.001) ## 训练和测试 def train(epoch): model.train() train_loss = 0 for data, label in train_loader: data, label = data.cuda(), label.cuda() optimizer = optimizer.zero_grad() output = model(data) loss = criterion(output, label) loss.backward() optimizer.step() train_loss += loss.item()*data.size(0) train_loss = train_loss/len(train_loader.dataset) print('Epoch: {} /tTraining Loss: {:.6f}'.format(epoch, train_loss)) def val(epoch): model.eval() val_loss = 0 gt_labels = [] pred_labels = [] with torch.no_grad(): for data, label in test_loader: data, label = data.cuda(), label.cuda() output = model(data) preds = torch.argmax(output, 1) gt_labels.append(label.cpu().data.numpy()) pred_labels.append(preds.cpu().data.numpy()) loss = criterion(output, label) val_loss += loss.item()*data.size(0) val_loss = val_loss/len(test_loader.dataset) gt_labels, pred_labels = np.concatenate(gt_labels), np.concatenate(pred_labels) acc = np.sum(gt_labels==pred_labels)/len(pred_labels) print('Epoch: {} /tValidation Loss: {:.6f}, Accuracy: {:6f}'.format(epoch, val_loss, acc)) ## 训练与测试 for epoch in range(1, epochs+1): train(epoch) val(epoch)
模型保存
save_path = './FahionModel.pkl' torch.save(model, save_path)
加载模型
model = torch.load('model.pkl')
注意:将模型保存成何种格式文件无所谓(比如pkl,pth等)。
保存与加载模型参数
torch.save(model.state_dict(), 'model_params.pth') model.load(torch.load( 'model_params.pth'))
原创文章,作者:ItWorker,如若转载,请注明出处:https://blog.ytso.com/tech/pnotes/281532.html