在CentOS系统下,PyTorch的并行计算可以通过多种方式实现,包括数据并行、模型并行和分布式并行。以下是一些关键步骤和代码示例,帮助你在CentOS上设置和使用PyTorch进行并行计算。
首先,确保你已经安装了PyTorch。你可以使用pip或conda来安装。以下是使用pip安装的示例:
pip install torch torchvision torchaudio
数据并行是最常见的并行计算方法之一,它通过在多个GPU上分配不同的数据批次来加速训练。
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
# 定义一个简单的模型
class SimpleModel(nn.Module):
def __init__(self):
super(SimpleModel, self).__init__()
self.fc = nn.Linear(10, 1)
def forward(self, x):
return self.fc(x)
# 创建模型并复制到多个GPU
model = SimpleModel()
model = nn.DataParallel(model)
# 将模型移动到GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# 创建数据加载器
class RandomDataset(Dataset):
def __init__(self, size, length):
self.len = length
self.data = torch.randn(length, size)
def __getitem__(self, index):
return self.data[index]
def __len__(self):
return self.len
dataset = RandomDataset(10, 64)
loader = DataLoader(dataset, batch_size=2, shuffle=True)
# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
# 训练模型
for epoch in range(10):
for data in loader:
inputs, targets = data[0].to(device), data[1].to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
print(f'Epoch {epoch+1}, Loss: {loss.item()}')
模型并行是将模型的不同部分分配到不同的GPU上。这种方法适用于模型非常大,无法放入单个GPU内存的情况。
import torch
import torch.nn as nn
# 定义一个简单的模型
class SimpleModel(nn.Module):
def __init__(self):
super(SimpleModel, self).__init__()
self.fc1 = nn.Linear(10, 10).to('cuda:0')
self.fc2 = nn.Linear(10, 1).to('cuda:1')
def forward(self, x):
x = x.to('cuda:0')
x = self.fc1(x)
x = x.to('cuda:1')
x = self.fc2(x)
return x
model = SimpleModel()
# 创建数据加载器
class RandomDataset(Dataset):
def __init__(self, size, length):
self.len = length
self.data = torch.randn(length, size)
def __getitem__(self, index):
return self.data[index]
def __len__(self):
return self.len
dataset = RandomDataset(10, 64)
loader = DataLoader(dataset, batch_size=2, shuffle=True)
# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
# 训练模型
for epoch in range(10):
for data in loader:
inputs, targets = data[0], data[1]
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
print(f'Epoch {epoch+1}, Loss: {loss.item()}')
分布式并行是在多个节点上进行并行计算,适用于大规模训练任务。
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn.parallel import DistributedDataParallel as DDP
import torch.distributed as dist
from torch.utils.data import DataLoader, DistributedSampler
import os
# 初始化分布式环境
dist.init_process_group(backend='nccl')
# 定义一个简单的模型
class SimpleModel(nn.Module):
def __init__(self):
super(SimpleModel, self).__init__()
self.fc = nn.Linear(10, 1)
def forward(self, x):
return self.fc(x)
# 创建模型并使用DDP包装
model = SimpleModel().to(torch.device("cuda"))
model = DDP(model)
# 创建数据加载器
class RandomDataset(Dataset):
def __init__(self, size, length):
self.len = length
self.data = torch.randn(length, size)
def __getitem__(self, index):
return self.data[index]
def __len__(self):
return self.len
dataset = RandomDataset(10, 64)
sampler = DistributedSampler(dataset)
loader = DataLoader(dataset, batch_size=2, sampler=sampler)
# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
# 训练模型
for epoch in range(10):
sampler.set_epoch(epoch)
for data in loader:
inputs, targets = data[0].to(torch.device("cuda")), data[1].to(torch.device("cuda"))
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
print(f'Epoch {epoch+1}, Loss: {loss.item()}')
# 清理分布式环境
dist.destroy_process_group()
在CentOS系统下,PyTorch的并行计算可以通过数据并行、模型并行和分布式并行来实现。选择哪种方法取决于你的具体需求和硬件资源。希望这些示例代码能帮助你开始在CentOS上进行PyTorch的并行计算。