Blog Full Notice
back to main page
pytorch 문법 정리
motivation: pytorch에서 공통적으로 쓰이는 문법을 정리해보자.
import torch
print(torch.__version__)
!nvidia-smi
torch.tensor(7)
TENSOR = torch.tensor([[[1,2,3],[4,5,6],[7,8,9]]])
TENSOR.ndim
TENSOR.shape
RT = torch.rand(3,4)
RT = torch.rand(size=(3,4))
RIT = torch.rand(size=(3,224,224))
RIT.ndim, RIT.shape
zeros = torch.zeros(size=(3,4))
ones = torch.ones(size=(3,4))
ones.dtype
zero_to_nine = torch.arange(0,10)
zero_to_nine = torch.arange(start=0, end=10, step=1)
torch.zeros_like(input=zero_to_nine)
float_32_tensor = torch.tensor(([3,4,5]), dtype=None, device=None, requires_grad=False)
float_16_tensor = float_32_tensor.type(torch.float16)
18.
some_tensor = torch.rand(3,4)
print(some_tensor)
print(f”Datatype of tensor: {some_tensor.dtype}”)
print(f”Shape of tensor: {some_tensor.shape}”)
print(f”Device tensor is on: {some_tensor.device}”)
19.
tensor = torch.tensor([1,2,3])
tensor + 10
tensor * 10
tensor - 10
다 element wise로 operation이 수행이 됨.
20~22. matrix multiplication
torch.matmul(tensor_A, tensor_B.T) # T는 traspose,
torch.mm(tensor_A, tensor_B.T) ## matmul 대신에 mm 써도 됨.
23~26. tensor aggregation + max/min of tensors + reshaping/viewing/stacking/squeezing/unsqueezing/permuting tensors
x = torch.arange(0,100,10) # -> [1,2,3,4,5,6,7,8,9]
x.argmin() # 가장 작은 값을 가지는 index를 return
x.argmax() # 가장 큰 값을 가지는 index를 return
x.shape # torch.Size([9])
R = x.reshape(1,9) # 차원 한개 추가.
R = x.reshape(9,1) # 열벡터로 바꿔줌.
V = x.view(1,9) # 같은 메모리를 가르켜서, x를 바꾸면 V도 바뀜.
S = torch.stack([x,x,x,x], dim=0) # 행에 추가.
S = torch.stack([x,x,x,x], dim=1) # 열벡터로 나열해줌
SQ = R.squeeze() # 하면 (1,9)를 (9)로 바꿔줌. # 가장 밖에 있는 차원 한개 없애줌
USQ = x.unsqueeze(dim=0) # 하면 (9)를 (1,9)로 바꿔줌.
USQ = x.unsqueeze(dim=1) # 하면 (9)를 (9,1)로 바꿔줌.
image = torch.rand(size=(244,244,3))
P = torch.permute(image, (2,0,1)) # 하면 image의 차원이 3,244,244가 된다.
# 27. select data in PyTorch (similar with NumPy)
x = torch.arange(1,10).reshape(1,3,3)
x[:,1,1] # : 를 쓴 개수만큼 bracket이 붙여서 나온다. # tensor([5]) 가 나온다.
x #tensor([[[1, 2, 3], [4, 5, 6], [7, 8, 9]]])
x[0,1,1] # tensor(5) 가 나온다.
28. numpy ←→ pytorch 변환 함수 두개
import torch
import numpy as np
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array).type(torch.float32) # numpy는 default datatype이 float64여서 변환 후 torch.float32로 바꾸어주어야 함.
tensor = torch.ones(size=(2,3,4))
array = tensor.numpy() # array.dtype은 float32가 된다. # numpy()는 cpu에서만 가능. 바꾸는 것도. numpy 관련 모든 것은 cpu에서만 가능.
29. pytorch reproducibility
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)
x = torch.rand(3,4)
torch.manual_seed(RANDOM_SEED)
y = torch.rand(3,4) # 하면 x와 y는 완벽히 똑같다.
30. gpu 확인하는 방법
!nvidia-smi
torch.cuda.is_available()
torch.cuda.device_count()
31. 실제로 어떻게 gpu를 활용하게 되는지. 먼저 device에 cpu 아니면 cuda 저장. 그리고 tensor를 gpu/cpu로 옮긴다.
device = “cuda” if torch.cuda.is_available() else “cpu” # pytorch device agnostic code
tensor = torch.tensor([1,2,3])
tensor.device # default로 cpu로 나오고, cuda로 나오게 설정할 수 있음.
tensor_move_to_gpu = tensor.to(device) # tensor.to(“cuda”)
tensor_move_to_gpu.numpy() # gpu에 있어서 불가능함
tensor_move_to_cpu = tensor_move_to_gpu.cpu() # cpu로 옮기는 것.
tensor_move_to_cpu.numpy() # 하면 옮겨짐.
chapter 1, pytorch Workflow
import torch
from torch import nn # nn contains all of PyTorch’s building blocks for NNs
import matplotlib.pyplot as plt
# check PyTorch
torch.__version__
## data preparing and loading
weight = 0.7
bias = 0.3
# create
start = 0
end = 1
step = 0.02
X = torch.arange(start, end, step).unsqueeze(dim=1) # 차원 확장, 열벡터로.
y = weight * X + bias
#create a train/test split # 지금 X는 열벡터, 50개가 있음. len(X) = 50.
train_split = int(0.8 * len(X))
X_train, y_train = X[:train_split], y[:train_split]
X_test, y_test = X[train_split:], y[train_split:]
len(X_train), len(y_train), len(X_test), len(y_test)
# visualizing data
def plot_predictions(train_data=X_train, train_labels = y_train, test_dat=X_test, test_labels=y_test, predictions=None):
plt.figure(figsize=(10,7))
# Plot training data in blue
plt.scatter(train_data, train_labels, c=’b’, s=4, label=”Training data”)
# Plot test data in green
plt.scatter(test_data, test_labels, c=’g’, s=4, label=”testting data”)
if predictions is not None:
# Plot the predictions if they exist
plt.scatter(test_data, predictions, c=’r’, s=4, label=”Predictions”)
# show the legend
plt.legend(prop={“size”: 14})
plot_predictions();
# pytorch model for linear regression
class LinearRegressionModel(nn.Module): # 모든 NN module이 nn.Module을 상속받는다.
def __init__(self):
super().__init__()
self.weights = nn.Parameter(torch.randn(1, required_grad=True, dtype=torch.float)) # 우리 모델이 random data에서 시작하고,
self.bias = nn.Parameter(torch.randn(1, requires_grad=True, dtype=torch.float)) # 우리 모델이 random data에서 시작하고,
def forward(self, x: torch.Tensor) -> torch.Tensor:
return self.weights * x + self.bias # linear regression formula
# gradient descent를 통해서 weight/bias를 update할 것.
cheetsheet: https://pytorch.org/tutorials/beginner/ptcheat.html
### Pytorch model의 content들을 확인.
# create random seed. # 계속 같은 seed를 써서 random 것을 계속 똑같게 가져오기 위해서.
torch.manual_seed(42)
# Create an instance of the model (subclass of nn.Module)
model_0 = LinearRegressionModel()
# Check out the parameter
list(model_0.parameters()) # model의 parameter들 확인하는 방법
model_0.state_dict() # model의 parameter들 확인하는 방법
42부터
class LinearRegressionModel(nn.Module): # 모든 NN module이 nn.Module을 상속받는다.
def __init__(self):
super().__init__()
self.weights = nn.Parameter(torch.randn(1, requires_grad=True, dtype=torch.float)) # 우리 모델이 random data에서 시작하고,
self.bias = nn.Parameter(torch.randn(1, requires_grad=True, dtype=torch.float)) # 우리 모델이 random data에서 시작하고,
def forward(self, x: torch.Tensor) -> torch.Tensor:
return self.weights * x + self.bias # linear regression formula
# gradient descent를 통해서 weight/bias를 update할 것.
torch.manual_seed(42)
model_0 = LinearRegressionModel()
model_0.state_dict() # list(model_0.parameters()) # parameter 확인
y_ preds = model_0(X_train) # check predictions,
loss_fn = nn.L1Loss() # loss function 설정
optimizer = torch.optim.SGD(params=model_0.parameters(), lr=0.0001) # optimizer 설정
# training and testing
torch.manual_seed(42) # optimizer가 random하게 시작될 것이기 때문.
epochs = 100
epoch_count = []; loss_values = []; test_loss_values = [];
for epoch in range(epochs):
model.train()
y_pred = model_0(X_train) # loss = loss_fn(y_pred, y_train)
print(f”Loss: {loss}”)
optimizer.zero_grad()
loss.backward() # backpropagation 수행
optimizer.step() # update parameters to 정답
### Test time
model_0.eval() # 이거는 evaluation/testing에서 필요 없는 세팅들을 끈다.
#i.e. dropout layers, batch norm layers. Dropout 층은 평가 모드에서는 비활성화, BN층은 학습 과정에서 계산된 통계를 사용할 수 있도록. 세팅을 하는 것이다.
# with torch.no_grad(): # 옛날 버젼에서는 이걸로 해야 함.
with torch.inference_mode(): # gradient 추적 끄기: 평가 단계에서는 그라디언트를 계산할 필요가 없음.
# 1. do the forward pass
test_pred = model_0(X_test)
# 2. calculate the loss
test_loss = loss_fn(test_pred, y_test)
# print out what’s happening
if epoch % 10 ==0:
epoch_count.append(epoch)
loss_values.append(loss)
test_loss_values.append(test_loss)
print(f”Epoch: {epoch} | Train loss: {loss:.4f} | Test loss: {test_loss:.4f}”)
print(model_0.state_dict())
# 추론 그래프 그리기
with torch.inference_mode():
y_preds_new = model_0(X_test)
plot_predictions(predictions=y_preds_new)
# Plot the loss curves
loss_values = torch.tensor(loss_values).numpy()
plt.plot(epoch_count, loss_values, label=”Train loss”)
plt.plot(epoch_count, test_loss_values, label=”Train loss”)
plt.title(“Training and test loss curves”)
plt.ylabel(“Loss”)
plt.xlabel(“Epochs”)
plt.legend();
# save model state dict
from pathlib import Path
# 1. create models directory
MODEL_PATH = Path(“models”)
MODEL_PATH.mkdir(parents=True, exist_ok=True)
# 2. Create model save path
MODEL_NAME = “01_pytorch_workflow_model_0.pth”
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME
# 3. save the model state dict
print(f”Saving model to: {MODEL_SAVE_PATH}”)
torch.save(obj=model_0.state_dict(), f=MODEL_SAVE_PATH) #torch.save(model.state_dict(), PATH)
!ls
# load model state dict
loaded_model_0 = LinearRegressionModel()
loaded_model_0.load_state_dict() # random number로 initialization됨.
loaded_model_0.load_state_dict(torch.load(f=MODEL_SAVE_PATH))
# Make some predictions with our loaded model
loaded_model_0.eval()
with torch.inference_mode():
loaded_model_preds = loaded_model_0(X_test)
loaded_model_preds
# 하면,
model_0.eval()
with torch.inference_mode():
y_preds = model_0(X_test)
y_preds == loaded_model_preds # 하면 완전 똑같다.
56. device-agnostic code
import torch
from torch import nn
import matplotlib.pyplot as plt
import numpy as np
torch.__version__
# device-agnostic code → gpu 있으면 gpu 사용.
device = “cuda” if torch.cuda.is_available() else “cpu”
print(f”Using device: {device}”)
!nvidia-smi
# data가, X_train, y_train, X_test, y_test가 있다고 하자.
weight = 0.7
bias = 0.3
start =0
end =1
step = 0.02
X = torch.arange(start,end,step).unsqueeze(dim=1)
y = weight * X + bias
X[:10], y[:10] # data 확인
# train test split
train_split = int(0.8 * len(X))
X_train, y_train= X[:train_split], y[:train_split]
X_test, y_test= X[train_split:], y[train_split:]
len(X_train), len(y_train), len(X_test), len(y_test)
# define plotting function
def plot_predictions(train_data=X_train, train_labels = y_train, test_dat=X_test, test_labels=y_test, predictions=None):
plt.figure(figsize=(10,7))
# Plot training data in blue
plt.scatter(train_data, train_labels, c=’b’, s=4, label=”Training data”)
# Plot test data in green
plt.scatter(test_data, test_labels, c=’g’, s=4, label=”testting data”)
if predictions is not None:
# Plot the predictions if they exist
plt.scatter(test_data, predictions, c=’r’, s=4, label=”Predictions”)
# show the legend
plt.legend(prop={“size”: 14})
# Plot the data
plot_predictions(X_train, y_train, X_test, y_test)
# linear model을 만들어보자.
class LinearRegressionModelV2(nn.Module):
def __init__(self):
super().__init__()
self.linear_layer = nn.Linear(in_features=1, out_features=1)
# 이 layer를, forward와 self.weight, self.bias를 우리 대신 해주는 것.
# in_feature, out_feature. size of each input/output sample
def forward(self, x: torch.Tensor) -> torch.Tensor:
return self.linear_layer(x)
# set the manual seed
torch.manula_seed(42)
model_1 = LinearRegressionModelV2()
model_1, model_1.state_dict()
# 하면 weight와 bias를 출력해준다.
#
# torch.nn 을 통해서, 많은 predefined layer들을 사용한다.
# Check the model current device
next(model_1.parameters()).device # device(type=’cpu’)
# Set the model to use the target device
model_1.to(device)
next(model_1.parameters()).device # device(type=’cuda’, index=0)
### 6.3 training
Loss function
optimizer
Training loop
Testing loop
# Setup loss function
loss_fn = nn.L1Loss() # same as MAE
# Setup our optimizer
optimizer = torch.optim.SGC(params=model_1.parameters(), lr=0.01)
# Let’s write a training loop
torch.manual_seed(42)
epochs = 200
# Put data on the target device (device agnostic code for data)
X_train = X_train.to(device)
y_train = y_train.to(device)
X_test = X_test.to(device)
y_test = y_test.to(device)
for epoch in range(epochs):
model_1.train()
# 1. Forward pass
y_pred = model_1(X_train) # data도 gpu에 넣어야 한다.
# 2. Calculate the loss
loss = loss_fn(y_pred, y_train)
# 3. optimizer zero grad
optimizer.zero_grad()
# 4. Perform backpropagation
loss.backward()
# 5. Optimizer step
optimizer.step()
# Testing
model_1.eval()
with torch.inference_mode():
test_pred = model_1(X_test)
test_loss = loss_fn(test_pred, y_test)
# print out what’s happening
if epoch % 10 ==0:
print(f”Epoch: {epoch} | Train loss: {loss:.4f} | Test loss: {test_loss:.4f}”)
6.4 making and evaluating predictions
# Turn model into evaluation mode
model_1.eval()
# Make predictions on the test data
with torch.inference_mode():
y_preds = model_1(X_test)
y_preds
plot_predictions(predictions=y_preds.cpu()) # matplotlib, numpy 기반, cpu에서.
6.5 saving & loading a trained model
# save model state dict
from pathlib import Path
# 1. create models directory
MODEL_PATH = Path(“models”)
MODEL_PATH.mkdir(parents=True, exist_ok=True)
# 2. Create model save path
MODEL_NAME = “01_pytorch_workflow_model_1.pth”
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME
# 3. save the model state dict
print(f”Saving model to: {MODEL_SAVE_PATH}”)
torch.save(obj=model_0.state_dict(), f=MODEL_SAVE_PATH) #torch.save(model.state_dict(), PATH)
!ls
# Load PyTorch model
# Create a new instance of linear regression model V2
loaded_model_1 = LinearRegressionModelV2()
# Load the saved model_1 state_dict
#loaded_model_0.load_state_dict() # random number로 initialization됨.
loaded_model_1.load_state_dict(torch.load(f=MODEL_SAVE_PATH))
loaded_model_1.to(device) # device에 loaded model을 넣는다.
next(loaded_model_1.parameters()).device # cuda에 있는지 확인. cuda에 있어야 함.
loaded_model_1.state_dict() # weight을 잘 load해왔는지 확인
# Make some predictions with our loaded model
loaded_model_1.eval()
with torch.inference_mode():
loaded_model_preds = loaded_model_0(X_test)
y_preds == loaded_model_preds # 하면 완전 똑같다.
# Import tqdm for progress bar
from tqdm.auto import tqdm
# Set the seed and start the timer
torch.manual_seed(42)
train_time_start_on_cpu = timer()
# Set the number of epochs (we'll keep this small for faster training times)
epochs = 3
# Create training and testing loop
for epoch in tqdm(range(epochs)):
print(f"Epoch: {epoch}\n-------")
### Training
train_loss = 0
# Add a loop to loop through training batches
for batch, (X, y) in enumerate(train_dataloader):
model_0.train()
# 1. Forward pass
y_pred = model_0(X)
# 2. Calculate loss (per batch)
loss = loss_fn(y_pred, y)
train_loss += loss # accumulatively add up the loss per epoch
# 3. Optimizer zero grad
optimizer.zero_grad()
# 4. Loss backward
loss.backward()
# 5. Optimizer step
optimizer.step()
# Print out how many samples have been seen
if batch % 400 == 0:
print(f"Looked at {batch * len(X)}/{len(train_dataloader.dataset)} samples")
# Divide total train loss by length of train dataloader (average loss per batch per epoch)
train_loss /= len(train_dataloader)
### Testing
# Setup variables for accumulatively adding up loss and accuracy
test_loss, test_acc = 0, 0
model_0.eval()
with torch.inference_mode():
for X, y in test_dataloader:
# 1. Forward pass
test_pred = model_0(X)
# 2. Calculate loss (accumatively)
test_loss += loss_fn(test_pred, y) # accumulatively add up the loss per epoch
# 3. Calculate accuracy (preds need to be same as y_true)
test_acc += accuracy_fn(y_true=y, y_pred=test_pred.argmax(dim=1))
# Calculations on test metrics need to happen inside torch.inference_mode()
# Divide total test loss by length of test dataloader (per batch)
test_loss /= len(test_dataloader)
# Divide total accuracy by length of test dataloader (per batch)
test_acc /= len(test_dataloader)
## Print out what's happening
print(f"\nTrain loss: {train_loss:.5f} | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}%\n")
# Calculate training time
train_time_end_on_cpu = timer()
total_train_time_model_0 = print_train_time(start=train_time_start_on_cpu,
end=train_time_end_on_cpu,
device=str(next(model_0.parameters()).device))
댓글남기기