Blog Full Notice
back to main page

9 분 소요

motivation: pytorch에서 공통적으로 쓰이는 문법을 정리해보자.



import torch
print(torch.__version__)
!nvidia-smi
torch.tensor(7)
TENSOR = torch.tensor([[[1,2,3],[4,5,6],[7,8,9]]])
TENSOR.ndim
TENSOR.shape
RT = torch.rand(3,4)
RT = torch.rand(size=(3,4))
RIT = torch.rand(size=(3,224,224))
RIT.ndim, RIT.shape
zeros = torch.zeros(size=(3,4))
ones = torch.ones(size=(3,4))
ones.dtype
zero_to_nine = torch.arange(0,10)
zero_to_nine = torch.arange(start=0, end=10, step=1)
torch.zeros_like(input=zero_to_nine)

float_32_tensor = torch.tensor(([3,4,5]), dtype=None, device=None, requires_grad=False)
float_16_tensor = float_32_tensor.type(torch.float16)


18.
some_tensor = torch.rand(3,4)
print(some_tensor)
print(fDatatype of tensor: {some_tensor.dtype})
print(fShape of tensor: {some_tensor.shape})
print(fDevice tensor is on: {some_tensor.device})

19. 
tensor = torch.tensor([1,2,3])
tensor + 10
tensor * 10
tensor - 10
 element wise로 operation이 수행이 .

20~22. matrix multiplication

torch.matmul(tensor_A, tensor_B.T)		# T는 traspose, 
torch.mm(tensor_A, tensor_B.T)		## matmul 대신에 mm 써도 됨.

23~26. tensor aggregation + max/min of tensors + reshaping/viewing/stacking/squeezing/unsqueezing/permuting tensors

x = torch.arange(0,100,10) # -> [1,2,3,4,5,6,7,8,9]
x.argmin()		# 가장 작은 값을 가지는 index를 return
x.argmax()		# 가장 큰 값을 가지는 index를 return
x.shape	# torch.Size([9])
R = x.reshape(1,9)		# 차원 한개 추가.
R = x.reshape(9,1)		# 열벡터로 바꿔줌.
V = x.view(1,9)		# 같은 메모리를 가르켜서, x를 바꾸면 V도 바뀜.
S = torch.stack([x,x,x,x], dim=0)	# 행에 추가.
S = torch.stack([x,x,x,x], dim=1)	# 열벡터로 나열해줌
SQ = R.squeeze()		# 하면 (1,9)를 (9)로 바꿔줌. # 가장 밖에 있는 차원 한개 없애줌
USQ = x.unsqueeze(dim=0)	# 하면 (9)를 (1,9)로 바꿔줌.
USQ = x.unsqueeze(dim=1)	# 하면 (9)를 (9,1)로 바꿔줌.
image = torch.rand(size=(244,244,3))
P = torch.permute(image, (2,0,1)) # 하면 image의 차원이 3,244,244가 된다.


# 27. select data in PyTorch (similar with NumPy)
x = torch.arange(1,10).reshape(1,3,3)
x[:,1,1] 		# : 를 쓴 개수만큼 bracket이 붙여서 나온다. # tensor([5]) 가 나온다.
x 			#tensor([[[1, 2, 3], [4, 5, 6], [7, 8, 9]]])
x[0,1,1]    		# tensor(5) 가 나온다.



28. numpy ←→ pytorch 변환 함수 두개
import torch
import numpy as np
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array).type(torch.float32)	# numpy는 default datatype이 float64여서 변환 후 torch.float32로 바꾸어주어야 함.

tensor = torch.ones(size=(2,3,4))
array = tensor.numpy()		# array.dtype은 float32가 된다. # numpy()는 cpu에서만 가능. 바꾸는 것도. numpy 관련 모든 것은 cpu에서만 가능.


29. pytorch reproducibility

RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)
x = torch.rand(3,4)
torch.manual_seed(RANDOM_SEED)
y = torch.rand(3,4) 		# 하면 x와 y는 완벽히 똑같다.


30. gpu 확인하는 방법

!nvidia-smi

torch.cuda.is_available()
torch.cuda.device_count()


31. 실제로 어떻게 gpu를 활용하게 되는지. 먼저 device에 cpu 아니면 cuda 저장. 그리고 tensor를 gpu/cpu로 옮긴다.

device = cuda if torch.cuda.is_available() else cpu 	# pytorch device agnostic code

tensor = torch.tensor([1,2,3])
tensor.device		# default로 cpu로 나오고, cuda로 나오게 설정할 수 있음.
tensor_move_to_gpu = tensor.to(device)		# tensor.to(“cuda”)
tensor_move_to_gpu.numpy()		# gpu에 있어서 불가능함
tensor_move_to_cpu = tensor_move_to_gpu.cpu()	# cpu로 옮기는 것.
tensor_move_to_cpu.numpy() 		# 하면 옮겨짐.


chapter 1, pytorch Workflow


import torch
from torch import nn # nn contains all of PyTorch’s building blocks for NNs
import matplotlib.pyplot as plt

# check PyTorch
torch.__version__


## data preparing and loading

weight = 0.7
bias = 0.3

# create
start = 0
end = 1
step = 0.02
X = torch.arange(start, end, step).unsqueeze(dim=1)	# 차원 확장, 열벡터로.
y = weight * X + bias


#create a train/test split	# 지금 X는 열벡터, 50개가 있음. len(X) = 50.
train_split = int(0.8 * len(X))
X_train, y_train = X[:train_split], y[:train_split]
X_test, y_test = X[train_split:], y[train_split:]

len(X_train), len(y_train), len(X_test), len(y_test)


# visualizing data
def plot_predictions(train_data=X_train, train_labels = y_train, test_dat=X_test, test_labels=y_test, predictions=None):
	plt.figure(figsize=(10,7))
	# Plot training data in blue
	plt.scatter(train_data, train_labels, c=b, s=4, label=Training data)
	# Plot test data in green
	plt.scatter(test_data, test_labels, c=g, s=4, label=testting data)
	if predictions is not None:
		# Plot the predictions if they exist
		plt.scatter(test_data, predictions, c=r, s=4, label=Predictions)
	# show the legend
	plt.legend(prop={size: 14})

plot_predictions();


# pytorch model for linear regression
class LinearRegressionModel(nn.Module): # 모든 NN module이 nn.Module을 상속받는다.
	def __init__(self):
		super().__init__()
		self.weights = nn.Parameter(torch.randn(1, required_grad=True, dtype=torch.float)) # 우리 모델이 random data에서 시작하고,
		self.bias = nn.Parameter(torch.randn(1, requires_grad=True, dtype=torch.float)) # 우리 모델이 random data에서 시작하고,
		
	def forward(self, x: torch.Tensor) -> torch.Tensor:
		return self.weights * x + self.bias	# linear regression formula
# gradient descent를 통해서 weight/bias를 update할 것.





cheetsheet: https://pytorch.org/tutorials/beginner/ptcheat.html







### Pytorch model의 content들을 확인.
# create random seed. # 계속 같은 seed를 써서 random 것을 계속 똑같게 가져오기 위해서.
torch.manual_seed(42)
# Create an instance of the model (subclass of nn.Module)
model_0 = LinearRegressionModel()
# Check out the parameter
list(model_0.parameters())	#  model의 parameter들 확인하는 방법 
model_0.state_dict()		# model의 parameter들 확인하는 방법


42부터

class LinearRegressionModel(nn.Module): # 모든 NN module이 nn.Module을 상속받는다.
	def __init__(self):
		super().__init__()
		self.weights = nn.Parameter(torch.randn(1, requires_grad=True, dtype=torch.float)) # 우리 모델이 random data에서 시작하고,
		self.bias = nn.Parameter(torch.randn(1, requires_grad=True, dtype=torch.float)) # 우리 모델이 random data에서 시작하고,
		
	def forward(self, x: torch.Tensor) -> torch.Tensor:
		return self.weights * x + self.bias	# linear regression formula
# gradient descent를 통해서 weight/bias를 update할 것.

torch.manual_seed(42)
model_0 = LinearRegressionModel()
model_0.state_dict()	# list(model_0.parameters()) # parameter 확인

y_ preds = model_0(X_train)	# check predictions,
loss_fn = nn.L1Loss()		# loss function 설정
optimizer = torch.optim.SGD(params=model_0.parameters(), lr=0.0001) # optimizer 설정


# training and testing
torch.manual_seed(42)	# optimizer가 random하게 시작될 것이기 때문.
epochs = 100 
epoch_count = []; loss_values = []; test_loss_values = [];
for epoch in range(epochs):
model.train()
y_pred = model_0(X_train)	# loss = loss_fn(y_pred, y_train)
print(fLoss: {loss})
optimizer.zero_grad()
loss.backward()	# backpropagation 수행
optimizer.step()	# update parameters to 정답


### Test time
model_0.eval()      # 이거는 evaluation/testing에서 필요 없는 세팅들을 끈다. 
#i.e. dropout layers, batch norm layers. Dropout 층은 평가 모드에서는 비활성화, BN층은 학습 과정에서 계산된 통계를 사용할 수 있도록. 세팅을 하는 것이다.

# with torch.no_grad():	# 옛날 버젼에서는 이걸로 해야 함.
with torch.inference_mode(): # gradient 추적 끄기: 평가 단계에서는 그라디언트를 계산할 필요가 없음.
	# 1. do the forward pass
	test_pred = model_0(X_test)
	# 2. calculate the loss
	test_loss = loss_fn(test_pred, y_test)

# print out what’s happening
if epoch % 10 ==0:
	epoch_count.append(epoch)
	loss_values.append(loss)
	test_loss_values.append(test_loss)
print(fEpoch: {epoch} | Train loss: {loss:.4f} | Test loss: {test_loss:.4f})
print(model_0.state_dict())

# 추론 그래프 그리기
with torch.inference_mode():
	y_preds_new = model_0(X_test)
plot_predictions(predictions=y_preds_new)	


# Plot the loss curves
loss_values = torch.tensor(loss_values).numpy()
plt.plot(epoch_count, loss_values, label=Train loss)
plt.plot(epoch_count, test_loss_values, label=Train loss)
plt.title(Training and test loss curves)
plt.ylabel(Loss)
plt.xlabel(Epochs)
plt.legend();


# save model state dict
from pathlib import Path
# 1. create models directory
MODEL_PATH = Path(models)
MODEL_PATH.mkdir(parents=True, exist_ok=True)

# 2. Create model save path
MODEL_NAME = 01_pytorch_workflow_model_0.pth
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

# 3. save the model state dict
print(fSaving model to: {MODEL_SAVE_PATH})
torch.save(obj=model_0.state_dict(), f=MODEL_SAVE_PATH) #torch.save(model.state_dict(), PATH)
!ls

# load model state dict
loaded_model_0 = LinearRegressionModel()
loaded_model_0.load_state_dict() 		# random number로 initialization됨.
loaded_model_0.load_state_dict(torch.load(f=MODEL_SAVE_PATH))

# Make some predictions with our loaded model
loaded_model_0.eval()
with torch.inference_mode():
	loaded_model_preds = loaded_model_0(X_test)

loaded_model_preds

# 하면, 
model_0.eval()
with torch.inference_mode():
	y_preds = model_0(X_test)

y_preds == loaded_model_preds # 하면 완전 똑같다.


56. device-agnostic code



import torch
from torch import nn
import matplotlib.pyplot as plt
import numpy as np
torch.__version__

# device-agnostic code → gpu 있으면 gpu 사용.
device = cuda if torch.cuda.is_available() else cpu
print(fUsing device: {device})
!nvidia-smi

# data가, X_train, y_train, X_test, y_test가 있다고 하자.
weight = 0.7
bias = 0.3
start =0
end =1
step = 0.02
X = torch.arange(start,end,step).unsqueeze(dim=1)
y = weight * X + bias
X[:10], y[:10]	# data 확인

# train test split
train_split = int(0.8 * len(X))
X_train, y_train= X[:train_split], y[:train_split]
X_test, y_test= X[train_split:], y[train_split:]
len(X_train), len(y_train), len(X_test), len(y_test)

# define plotting function
def plot_predictions(train_data=X_train, train_labels = y_train, test_dat=X_test, test_labels=y_test, predictions=None):
	plt.figure(figsize=(10,7))
	# Plot training data in blue
	plt.scatter(train_data, train_labels, c=b, s=4, label=Training data)
	# Plot test data in green
	plt.scatter(test_data, test_labels, c=g, s=4, label=testting data)
	if predictions is not None:
		# Plot the predictions if they exist
		plt.scatter(test_data, predictions, c=r, s=4, label=Predictions)
	# show the legend
	plt.legend(prop={size: 14})

# Plot the data
plot_predictions(X_train, y_train, X_test, y_test)

# linear model을 만들어보자.
class LinearRegressionModelV2(nn.Module):
	def __init__(self):
		super().__init__()
		self.linear_layer = nn.Linear(in_features=1, out_features=1)
		# 이 layer를, forward와 self.weight, self.bias를 우리 대신 해주는 것.
		# in_feature, out_feature. size of each input/output sample
		
	def forward(self, x: torch.Tensor) -> torch.Tensor:
		return self.linear_layer(x)

# set the manual seed
torch.manula_seed(42)
model_1 = LinearRegressionModelV2()
model_1, model_1.state_dict()

# 하면 weight와 bias를 출력해준다.
# 
# torch.nn 을 통해서, 많은 predefined layer들을 사용한다.


# Check the model current device
next(model_1.parameters()).device		# device(type=’cpu’)
# Set the model to use the target device
model_1.to(device)
next(model_1.parameters()).device		# device(type=’cuda’, index=0)




### 6.3 training
Loss function
optimizer
Training loop
Testing loop

# Setup loss function
loss_fn = nn.L1Loss()		# same as MAE
# Setup our optimizer
optimizer = torch.optim.SGC(params=model_1.parameters(), lr=0.01)

# Let’s write a training loop
torch.manual_seed(42)
epochs = 200

# Put data on the target device (device agnostic code for data)
X_train = X_train.to(device)
y_train = y_train.to(device)
X_test = X_test.to(device)
y_test = y_test.to(device)




for epoch in range(epochs):
	model_1.train()
	
	# 1. Forward pass
	y_pred = model_1(X_train)		# data도 gpu에 넣어야 한다.
	
	# 2. Calculate the loss
	loss = loss_fn(y_pred, y_train)

	# 3. optimizer zero grad
	optimizer.zero_grad()

	# 4. Perform backpropagation
	loss.backward()

	# 5. Optimizer step
	optimizer.step()

	# Testing
	model_1.eval()
	with torch.inference_mode():
		test_pred = model_1(X_test)
		test_loss = loss_fn(test_pred, y_test)

# print out what’s happening
if epoch % 10 ==0:
print(fEpoch: {epoch} | Train loss: {loss:.4f} | Test loss: {test_loss:.4f})





6.4 making and evaluating predictions

# Turn model into evaluation mode
model_1.eval()

# Make predictions on the test data
with torch.inference_mode():
	y_preds = model_1(X_test)
y_preds

plot_predictions(predictions=y_preds.cpu())		# matplotlib, numpy 기반, cpu에서.

6.5 saving & loading a trained model


# save model state dict
from pathlib import Path
# 1. create models directory
MODEL_PATH = Path(models)
MODEL_PATH.mkdir(parents=True, exist_ok=True)

# 2. Create model save path
MODEL_NAME = 01_pytorch_workflow_model_1.pth
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

# 3. save the model state dict
print(fSaving model to: {MODEL_SAVE_PATH})
torch.save(obj=model_0.state_dict(), f=MODEL_SAVE_PATH) #torch.save(model.state_dict(), PATH)
!ls

# Load PyTorch model
# Create a new instance of linear regression model V2
loaded_model_1 = LinearRegressionModelV2()
# Load the saved model_1 state_dict
#loaded_model_0.load_state_dict() 		# random number로 initialization됨.
loaded_model_1.load_state_dict(torch.load(f=MODEL_SAVE_PATH))
loaded_model_1.to(device)	# device에 loaded model을 넣는다.


next(loaded_model_1.parameters()).device	# cuda에 있는지 확인. cuda에 있어야 함.
loaded_model_1.state_dict()			# weight을 잘 load해왔는지 확인

# Make some predictions with our loaded model
loaded_model_1.eval()
with torch.inference_mode():
	loaded_model_preds = loaded_model_0(X_test)

y_preds == loaded_model_preds	# 하면 완전 똑같다.




# Import tqdm for progress bar
from tqdm.auto import tqdm

# Set the seed and start the timer
torch.manual_seed(42)
train_time_start_on_cpu = timer()

# Set the number of epochs (we'll keep this small for faster training times)
epochs = 3

# Create training and testing loop
for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch}\n-------")
    ### Training
    train_loss = 0
    # Add a loop to loop through training batches
    for batch, (X, y) in enumerate(train_dataloader):
        model_0.train() 
        # 1. Forward pass
        y_pred = model_0(X)

        # 2. Calculate loss (per batch)
        loss = loss_fn(y_pred, y)
        train_loss += loss # accumulatively add up the loss per epoch 

        # 3. Optimizer zero grad
        optimizer.zero_grad()

        # 4. Loss backward
        loss.backward()

        # 5. Optimizer step
        optimizer.step()

        # Print out how many samples have been seen
        if batch % 400 == 0:
            print(f"Looked at {batch * len(X)}/{len(train_dataloader.dataset)} samples")

    # Divide total train loss by length of train dataloader (average loss per batch per epoch)
    train_loss /= len(train_dataloader)
    
    ### Testing
    # Setup variables for accumulatively adding up loss and accuracy 
    test_loss, test_acc = 0, 0 
    model_0.eval()
    with torch.inference_mode():
        for X, y in test_dataloader:
            # 1. Forward pass
            test_pred = model_0(X)
           
            # 2. Calculate loss (accumatively)
            test_loss += loss_fn(test_pred, y) # accumulatively add up the loss per epoch

            # 3. Calculate accuracy (preds need to be same as y_true)
            test_acc += accuracy_fn(y_true=y, y_pred=test_pred.argmax(dim=1))
        
        # Calculations on test metrics need to happen inside torch.inference_mode()
        # Divide total test loss by length of test dataloader (per batch)
        test_loss /= len(test_dataloader)

        # Divide total accuracy by length of test dataloader (per batch)
        test_acc /= len(test_dataloader)

    ## Print out what's happening
    print(f"\nTrain loss: {train_loss:.5f} | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}%\n")

# Calculate training time      
train_time_end_on_cpu = timer()
total_train_time_model_0 = print_train_time(start=train_time_start_on_cpu, 
                                           end=train_time_end_on_cpu,
                                           device=str(next(model_0.parameters()).device))

댓글남기기