去噪扩散概率模型（Denoising Diffusion Probabilistic Models, DDPM）-Python案例

1、去噪概率模型（Denoising Probabilistic Models）

去噪概率模型（Denoising Probabilistic Models）是一类通过学习数据的潜在分布来去除噪声的生成模型。其核心思想是，在有噪声的数据中，模型通过条件概率学习输入数据与噪声之间的关系，从而能够生成去噪后的数据。

去噪概率模型的基本步骤

数据准备：将原始数据添加噪声，构造带噪声的样本。
模型训练：使用带噪声的样本来训练模型，目标是最大化去噪后的样本的概率。
去噪过程：给定带噪声的样本，模型生成去噪后的样本。

PyTorch 示例代码

以下是一个简单的去噪自编码器的示例，使用PyTorch实现：

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
import os

# 定义去噪自编码器
class DenoisingAutoencoder(nn.Module):
    def __init__(self):
        super(DenoisingAutoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(784, 256),
            nn.ReLU(),
            nn.Linear(256, 64),
            nn.ReLU(),
        )
        self.decoder = nn.Sequential(
            nn.Linear(64, 256),
            nn.ReLU(),
            nn.Linear(256, 784),
            nn.Sigmoid(),
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

# 定义 MNIST 数据集类
class MNISTDataset(Dataset):
    def __init__(self, images_path, labels_path, transform=None):
        self.images = self.load_images(images_path)
        self.labels = self.load_labels(labels_path)
        self.transform = transform

    def load_images(self, path):
        with open(path, 'rb') as f:
            f.read(16)  # 跳过前16个字节
            images = np.frombuffer(f.read(), np.uint8).reshape(-1, 1, 28, 28)
        return torch.tensor(images, dtype=torch.float32).view(-1, 784)  # Flatten images

    def load_labels(self, path):
        with open(path, 'rb') as f:
            f.read(8)  # 跳过前8个字节
            labels = np.frombuffer(f.read(), np.uint8)
        return labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

# 数据准备
data_root = r'./MNIST'
train_dataset = MNISTDataset(
    images_path=os.path.join(data_root, 'train-images-idx3-ubyte'),
    labels_path=os.path.join(data_root, 'train-labels-idx1-ubyte')
)

test_dataset = MNISTDataset(
    images_path=os.path.join(data_root, 't10k-images-idx3-ubyte'),
    labels_path=os.path.join(data_root, 't10k-labels-idx1-ubyte')
)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# 模型、损失函数和优化器
model = DenoisingAutoencoder()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 可视化函数
def show_images(original, noisy, denoised):
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 3, 1)
    plt.title("Original")
    plt.imshow(original.view(28, 28).detach().numpy(), cmap='gray')
    
    plt.subplot(1, 3, 2)
    plt.title("Noisy")
    plt.imshow(noisy.view(28, 28).detach().numpy(), cmap='gray')
    
    plt.subplot(1, 3, 3)
    plt.title("Denoised")
    plt.imshow(denoised.view(28, 28).detach().numpy(), cmap='gray')
    
    plt.show()

# 训练模型
for epoch in range(5):
    for data, _ in train_loader:
        optimizer.zero_grad()
        noisy_data = data + 0.5 * torch.randn_like(data)  # 添加噪声
        noisy_data = torch.clamp(noisy_data, 0., 1.)
        
        output = model(noisy_data)
        
        loss = criterion(output, noisy_data)
        loss.backward()
        optimizer.step()
    
    print(f'Epoch {epoch + 1}, Loss: {loss.item()}')

# 可视化原始、带噪声和去噪后的图片
test_data, _ = next(iter(train_loader))
noisy_test_data = test_data + 0.5 * torch.randn_like(test_data)
noisy_test_data = torch.clamp(noisy_test_data, 0., 1.)
denoised_output = model(noisy_test_data)

# 显示图像
show_images(test_data[0], noisy_test_data[0], denoised_output[0])

输出

Epoch 1, Loss: 0.08383525162935257
Epoch 2, Loss: 0.07632550597190857
Epoch 3, Loss: 0.07644318789243698
Epoch 4, Loss: 0.07475438714027405
Epoch 5, Loss: 0.0726914331316948

在这里插入图片描述

说明

去噪自编码器：该模型包括编码器和解码器。编码器将输入数据压缩到低维表示，解码器将其还原为原始数据。
数据处理：在数据准备阶段，随机噪声被添加到原始数据中。
训练过程：模型通过最小化重建误差（均方误差）来学习去噪。

这个示例展示了如何使用PyTorch实现基本的去噪概率模型。

2、去噪扩散概率模型

去噪扩散概率模型（Denoising Diffusion Probabilistic Models, DDPM）是一种生成模型，通过逐步去噪从随机噪声中生成高质量数据。其核心思想是模拟一个正向过程，在这个过程中，数据被逐渐添加噪声，并训练一个反向过程，从噪声逐渐恢复数据。

关键步骤

正向过程：逐步向数据中添加噪声，直到它变得接近标准正态分布。（扩散）
反向过程：通过神经网络学习如何去噪，从而从随机噪声恢复到原始数据分布。（去噪）
训练：使用一个变分下界（VLB）来优化模型。

PyTorch 示例

以下是一个简化的去噪扩散概率模型的示例代码框架：

import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
import os
import numpy as np

# 定义 U-Net 网络
class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()
        self.encoder1 = nn.Conv2d(1, 64, kernel_size=3, padding=1)
        self.encoder2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.decoder1 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.decoder2 = nn.Conv2d(64, 1, kernel_size=3, padding=1)

    def forward(self, x):
        enc1 = torch.relu(self.encoder1(x))
        enc2 = torch.relu(self.encoder2(self.pool(enc1)))
        dec1 = self.decoder1(enc2)
        dec2 = self.decoder2(dec1 + enc1)  # Skip connection
        return torch.sigmoid(dec2)

# 噪声调度函数
def noise_schedule(t):
    return torch.minimum(torch.tensor(1.0).cuda(), t.float().view(-1, 1, 1, 1) / 1000)  # 变换为形状 (batch_size, 1, 1, 1)

class MNISTDataset(Dataset):
    def __init__(self, images_path, labels_path, transform=None):
        self.images = self.load_images(images_path)
        self.labels = self.load_labels(labels_path)
        self.transform = transform

    def load_images(self, path):
        with open(path, 'rb') as f:
            f.read(16)  # 跳过前16个字节
            images = np.frombuffer(f.read(), np.uint8).reshape(-1, 1, 28, 28)
        return torch.tensor(images, dtype=torch.float32)  # 保持原形状 (batch_size, 1, 28, 28)

    def load_labels(self, path):
        with open(path, 'rb') as f:
            f.read(8)  # 跳过前8个字节
            labels = np.frombuffer(f.read(), np.uint8)
        return labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

# 数据准备
data_root = r'./MNIST'
train_dataset = MNISTDataset(
    images_path=os.path.join(data_root, 'train-images-idx3-ubyte'),
    labels_path=os.path.join(data_root, 'train-labels-idx1-ubyte')
)

test_dataset = MNISTDataset(
    images_path=os.path.join(data_root, 't10k-images-idx3-ubyte'),
    labels_path=os.path.join(data_root, 't10k-labels-idx1-ubyte')
)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# 模型、损失函数和优化器
model = UNet().cuda()  # 使用GPU
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

# 训练过程
num_epochs = 5
for epoch in range(num_epochs):
    for images, _ in train_loader:
        images = images.cuda()  # 使用GPU
        optimizer.zero_grad()
        
        # 添加噪声
        t = torch.randint(0, 1000, (images.size(0),)).cuda()  # 随机时间步
        noise = torch.randn_like(images).cuda()
        noisy_images = images + noise_schedule(t) * noise  # 使用正确的噪声调度
        
        # 模型输出
        predicted_noise = model(noisy_images)
        
        # 计算损失
        loss = criterion(predicted_noise, noise)
        loss.backward()
        optimizer.step()
    
    print(f'Epoch {epoch + 1}, Loss: {loss.item()}')

# 可视化结果
test_image, _ = next(iter(train_loader))
test_image = test_image.cuda()  # 使用GPU
with torch.no_grad():
    noisy_test_image = test_image + torch.randn_like(test_image).cuda()  # 加入随机噪声
    predicted_noise = model(noisy_test_image)

# 显示原始、带噪声和去噪后的图像
def show_images(original, noisy, denoised):
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 3, 1)
    plt.title("Original")
    plt.imshow(original[0].squeeze(0).cpu().numpy(), cmap='gray')

    plt.subplot(1, 3, 2)
    plt.title("Noisy")
    plt.imshow(noisy[0].squeeze(0).cpu().numpy(), cmap='gray')

    plt.subplot(1, 3, 3)
    plt.title("Denoised")
    plt.imshow(denoised[0].squeeze(0).cpu().numpy(), cmap='gray')

    plt.show()

show_images(test_image, noisy_test_image, predicted_noise)

输出

Epoch 1, Loss: 0.7177927494049072
Epoch 2, Loss: 0.7039206027984619
Epoch 3, Loss: 0.7078389525413513
Epoch 4, Loss: 0.712163507938385
Epoch 5, Loss: 0.6767022013664246