👘 PyTorch 深度学习快速入门

·7 min read·1341

来源:《深度学习笔记》— PyTorch 深度学习快速入门

为什么选择 PyTorch

PyTorch 由 Meta AI 开发,采用动态计算图(Define-by-Run),写法接近普通 Python,调试方便,是学习深度学习原理的理想工具。

核心优势

  • Pythonic:代码直观,像写 NumPy 一样写神经网络
  • 动态图:可以在运行时改变网络结构
  • 自动求导:内置 autograd 引擎
  • GPU 支持:一行代码切换 CPU/GPU

1. 张量(Tensor)

张量是 PyTorch 的核心数据结构,类似 NumPy 的 ndarray,但支持 GPU 加速和自动求导。

1.1 创建张量

import torch
import numpy as np

# 从 Python 列表
t1 = torch.tensor([1.0, 2.0, 3.0])
print(t1.shape)   # torch.Size([3])
print(t1.dtype)   # torch.float32

# 从 NumPy 转换(共享内存)
arr = np.array([1, 2, 3])
t2 = torch.from_numpy(arr)

# 常用初始化
zeros = torch.zeros(3, 4)
ones  = torch.ones(3, 4)
rand  = torch.rand(3, 4)      # 均匀分布 [0,1)
randn = torch.randn(3, 4)     # 标准正态
eye   = torch.eye(3)

1.2 基本运算

a = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
b = torch.tensor([[5.0, 6.0], [7.0, 8.0]])

print(a + b)         # 加法
print(a * b)         # 逐元素乘法
print(a @ b)         # 矩阵乘法(等价 torch.mm(a, b))

print(a.reshape(4, 1))
print(a.T)
print(a.unsqueeze(0))  # (1,2,2)
print(a.squeeze())

1.3 CPU / GPU 切换

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备:{device}")

t = torch.randn(3, 3).to(device)

# 模型和数据都需移到同一设备
model = MyNet().to(device)
x = x.to(device)

2. 自动求导(Autograd)

2.1 requires_grad 与梯度追踪

x = torch.tensor([2.0], requires_grad=True)
y = x ** 2 + 3 * x + 1   # y = x² + 3x + 1

y.backward()

# dy/dx = 2x + 3 = 7.0
print(x.grad)   # tensor([7.])

2.2 梯度的累加与清零

PyTorch 默认累加梯度,训练时每次迭代前必须手动清零:

optimizer.zero_grad()   # 清零
loss.backward()         # 计算梯度
optimizer.step()        # 更新参数

2.3 推理时关闭梯度

model.eval()
with torch.no_grad():
    output = model(x_test)

3. 构建神经网络(nn.Module)

3.1 自定义网络

import torch.nn as nn
import torch.nn.functional as F

class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)   # 输出层不加激活
        return x

model = MLP(input_size=784, hidden_size=128, output_size=10)

3.2 nn.Sequential 快速搭建

model = nn.Sequential(
    nn.Linear(784, 128),
    nn.ReLU(),
    nn.Dropout(p=0.2),
    nn.Linear(128, 64),
    nn.ReLU(),
    nn.Linear(64, 10)
)

3.3 常用层一览

层类型PyTorch 类用途
全连接nn.Linear(in, out)MLP、分类头
卷积nn.Conv2d(in_ch, out_ch, kernel)图像特征提取
ReLUnn.ReLU()隐藏层激活
BatchNormnn.BatchNorm1d / 2d训练稳定性
Dropoutnn.Dropout(p=0.5)防止过拟合
池化nn.MaxPool2d(kernel_size)下采样

4. 损失函数与优化器

# 交叉熵(多分类,内含 Softmax)
criterion = nn.CrossEntropyLoss()

# MSE(回归)
criterion = nn.MSELoss()

# 二元交叉熵(二分类)
criterion = nn.BCELoss()
import torch.optim as optim

# SGD + Momentum
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# Adam
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 学习率调度
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

5. 标准训练循环

PyTorch 训练循环的"四步法":

def train_one_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    total_loss = 0

    for x_batch, y_batch in dataloader:
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)

        optimizer.zero_grad()         # 1. 清零梯度
        outputs = model(x_batch)      # 2. 正向传播
        loss = criterion(outputs, y_batch)  # 3. 计算损失
        loss.backward()               # 4. 反向传播
        optimizer.step()              #    + 更新参数

        total_loss += loss.item()

    return total_loss / len(dataloader)


def evaluate(model, dataloader, device):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for x_batch, y_batch in dataloader:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            outputs = model(x_batch)
            _, predicted = torch.max(outputs, dim=1)
            correct += (predicted == y_batch).sum().item()
            total += y_batch.size(0)

    return correct / total

6. 数据加载(DataLoader)

from torch.utils.data import DataLoader
from torchvision import datasets, transforms

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST 标准化
])

train_dataset = datasets.MNIST(root='./data', train=True,  download=True, transform=transform)
test_dataset  = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True,  num_workers=2)
test_loader  = DataLoader(test_dataset,  batch_size=64, shuffle=False, num_workers=2)

for x, y in train_loader:
    print(x.shape, y.shape)  # torch.Size([64, 1, 28, 28]) torch.Size([64])
    break

7. 完整示例:MNIST 手写数字分类

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# 1. 设置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 2. 数据
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_loader = DataLoader(
    datasets.MNIST('./data', train=True,  download=True, transform=transform),
    batch_size=64, shuffle=True
)
test_loader = DataLoader(
    datasets.MNIST('./data', train=False, download=True, transform=transform),
    batch_size=64, shuffle=False
)

# 3. 模型
class MnistNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Flatten(),         # (64,1,28,28) -> (64,784)
            nn.Linear(784, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 10)    # 10 类,不加 Softmax
        )

    def forward(self, x):
        return self.net(x)

model = MnistNet().to(device)

# 4. 损失与优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.5)

# 5. 训练循环
for epoch in range(10):
    model.train()
    train_loss = 0
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        loss = criterion(model(x), y)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    model.eval()
    correct = 0
    with torch.no_grad():
        for x, y in test_loader:
            x, y = x.to(device), y.to(device)
            pred = model(x).argmax(dim=1)
            correct += (pred == y).sum().item()

    acc = correct / len(test_loader.dataset)
    print(f"Epoch {epoch+1:2d} | Loss: {train_loss/len(train_loader):.4f} | Test Acc: {acc:.4f}")

    scheduler.step()

典型输出(CPU 上训练约 2-3 分钟):

Epoch  1 | Loss: 0.2831 | Test Acc: 0.9703
Epoch  2 | Loss: 0.1219 | Test Acc: 0.9783
...
Epoch 10 | Loss: 0.0401 | Test Acc: 0.9841

8. 模型的保存与加载

# 推荐:只保存参数(体积小、版本兼容好)
torch.save(model.state_dict(), 'mnist_model.pth')

# 加载:先重建结构,再载入参数
model = MnistNet()
model.load_state_dict(torch.load('mnist_model.pth'))
model.eval()

# 保存完整模型(路径依赖强)
torch.save(model, 'mnist_full_model.pth')
loaded_model = torch.load('mnist_full_model.pth')

9. 从 NumPy 感知机到 PyTorch:概念对照

前序章节(NumPy 手写)PyTorch 等价说明
Affinenn.Linear自动管理 W、b 参数
Relunn.ReLU() / F.relu()等价
SoftmaxWithLossnn.CrossEntropyLoss内含 Softmax
numerical_gradientloss.backward()自动微分,快几千倍
SGD.update()optimizer.step()Adam、SGD 均可替换
手动 grads['W1']param.grad自动存储在 .grad

PyTorch 本质上是对前序章节所有手写实现的工业级封装——理解了手写版本,PyTorch 的每一行代码都将变得透明可读。