py -3.12 -m venv .venv
.\.venv\Scripts\activate
py -m pip install --upgrade pip
pip install torch torchvision torchaudio
pip install jupyter torch torchvision torchinfo matplotlib tqdm
pip freeze > requirements.txt
ls
py -3.12 -m venv .venv
.\.venv\Scripts\activate
py -m pip install --upgrade pip
pip install -r .\requirements.txt
data_setup.py
from pathlib import Path
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
#############################################
# Make Dataset 함수
#############################################
def create_dataset(
root: str="./data/pizza_steak_sushi",
train: bool=True,
transform:transforms=None
) -> Dataset:
if train:
root = Path(root) / "train"
else:
root = Path(root) / "test"
return datasets.ImageFolder(root=root, transform=transform)
#############################################
# Make DataLoader 함수
#############################################
def create_dataloader(
dataset: Dataset,
batch_size: int=32,
shuffle: bool=True
):
return DataLoader(
dataset=dataset,
#dataset=create_dataset(root, train, transform),
batch_size=batch_size, shuffle=shuffle)
engine.py
import torch
from tqdm.auto import tqdm
from typing import Dict, List, Tuple
from utils import EarlyStopper
def train_step(model: torch.nn.Module,
dataloader: torch.utils.data.DataLoader,
loss_fn: torch.nn.Module,
optimizer: torch.optim.Optimizer,
device: torch.device,
valid_fn=None) -> Tuple[float, float]:
# Put model in train mode
model.train()
# Setup train loss and train accuracy values
train_loss, train_valid = 0, 0
# Loop through data loader data batches
for _, (X, y) in tqdm(enumerate(dataloader)
, desc="Train Loop", leave=False, total=len(dataloader)):
# Send data to target device
X, y = X.to(device), y.to(device)
# 1. Forward pass
y_pred = model(X)
# 2. Calculate and accumulate loss
loss = loss_fn(y_pred, y)
train_loss += loss.item()
if valid_fn:
train_valid += valid_fn(y_pred.argmax(dim=1).cpu().numpy(), y.cpu().numpy())
# 3. Optimizer zero grad
optimizer.zero_grad()
# 4. Loss backward
loss.backward()
# 5. Optimizer step
optimizer.step()
# Adjust metrics to get average loss and accuracy per batch
train_loss = train_loss / len(dataloader)
train_valid = train_valid / len(dataloader)
return train_loss, train_valid
def test_step ( model: torch.nn.Module,
dataloader: torch.utils.data.DataLoader,
loss_fn: torch.nn.Module,
device: torch.device,
valid_fn=None) -> Tuple[float, float]:
# Put model in eval mode
model.eval()
# Setup test loss and test accuracy values
test_loss, test_valid = 0, 0
# Turn on inference context manager
with torch.inference_mode():
# Loop through DataLoader batches
for _, (X, y) in tqdm(enumerate(dataloader)
, desc="Testing Loop", leave=False, total=len(dataloader)):
# Send data to target device
X, y = X.to(device), y.to(device)
# 1. Forward pass
test_pred_logits = model(X)
# 2. Calculate and accumulate loss
loss = loss_fn(test_pred_logits, y)
test_loss += loss.item()
# Calculate and accumulate accuracy
if valid_fn:
test_valid += valid_fn(test_pred_logits.argmax(dim=1).cpu().numpy(), y.cpu().numpy())
# Adjust metrics to get average loss and accuracy per batch
test_loss = test_loss / len(dataloader)
test_valid = test_valid / len(dataloader)
return test_loss, test_valid
def train( model: torch.nn.Module,
train_dataloader: torch.utils.data.DataLoader,
test_dataloader: torch.utils.data.DataLoader,
optimizer: torch.optim.Optimizer,
loss_fn: torch.nn.Module,
epochs: int,
device: torch.device,
earlystopper: EarlyStopper,
valid_fn=None) -> Dict[str, List]:
# Create empty results dictionary
results = {"train_loss": [],
"train_valid": [],
"test_loss": [],
"test_valid": []
}
# Loop through training and testing steps for a number of epochs
for epoch in tqdm(range(epochs), desc="Epoch Loop", leave=True):
# 모델 학습
train_loss, train_valid = train_step(model=model,
dataloader=train_dataloader,
loss_fn=loss_fn,
optimizer=optimizer,
device=device, valid_fn=valid_fn)
# 모델 평가
test_loss, test_valid = test_step(model=model,
dataloader=test_dataloader,
loss_fn=loss_fn,
device=device, valid_fn=valid_fn)
# Print out what's happening
print(
f"Epoch: {epoch+1} | "
f"train_loss: {train_loss:.4f} | "
f"test_loss: {test_loss:.4f} | "
)
# Update results dictionary
results["train_loss"].append(train_loss)
results["test_loss"].append(test_loss)
if valid_fn:
results["train_valid"].append(train_valid)
results["test_valid"].append(test_valid)
if not earlystopper.is_continuable(model, test_loss):
print("Early stopping...")
break
# Return the filled results at the end of the epochs
return results
model_builder.py
from torch import nn
from models.vgg_model import TinyVGG
def create_model(
color_size: int,
target_size: int,
is_trained: bool=False
) -> nn.Module:
if is_trained:
return # 사전학습 모델 적용
return TinyVGG(color_size=color_size , target_size=target_size)
util.py
import torch
import numpy as np
class EarlyStopper(object):
def __init__(self, num_trials, save_path):
self.num_trials = num_trials
self.trial_counter = 0
self.best_loss = np.inf
self.save_path = save_path
def is_continuable(self, model, loss):
if loss < self.best_loss: # 현재 loss가 최고 loss보다 더 낮은 경우
self.best_loss = loss # 최고 loss를 현재 loss로 업데이트
self.trial_counter = 0 # 초기화
torch.save(model, self.save_path) # 최고 loss를 갖은 모델 저장
return True
elif self.trial_counter + 1 < self.num_trials: # 현재 loss가 최고 loss보다 작은 경우 & max 시도횟수보다 현재 시도횟수가 작은 경우
self.trial_counter += 1 # 기존 시도횟수 + 1
return True
else: # 현재 정확도가 최고 정확도보다 작은 경우 & 현재 시도횟수가 max 시도횟수보다 큰 경우
return False
def get_best_model(self, device):
return torch.load(self.save_path).to(device)
main.py
import argparse
import torch
from torchvision import datasets, transforms
from data_setup import create_dataloader, create_dataset
from model_builder import create_model
from engine import train
from utils import EarlyStopper
def main(args):
"""
1. create dataset
2. create dataloader
3. create model
3. training
"""
transform = transforms.Compose([
transforms.Resize((64, 64)),
transforms.ToTensor()
]) # 데이터 전처리
train_dataset = create_dataset(train=True, transform=transform)
test_dataset = create_dataset(train=False, transform=transform)
train_dataloader = create_dataloader(dataset=train_dataset, batch_size=args.batch_size)
test_dataloader = create_dataloader(dataset=test_dataset, batch_size=args.batch_size)
feature, _ = train_dataset[0]
vgg_model = create_model(color_size=feature.shape[0]
, target_size=len(train_dataset.classes) , is_trained=False)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
vgg_model.to(device)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(
params=vgg_model.parameters(), lr=args.lr)
earlystopper = EarlyStopper(num_trials=5, save_path=args.trained_model)
train(model=vgg_model, train_dataloader=train_dataloader, test_dataloader=test_dataloader
, optimizer=optimizer, loss_fn=loss_fn, epochs=args.epochs, device=device
, earlystopper=earlystopper)
if __name__=="__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--batch_size", default=32, type=int)
parser.add_argument("--lr", default=0.01, type=float)
parser.add_argument("--epochs", default=50, type=int)
#parser.add_argument("--hidden_size", default=10, type=int)
parser.add_argument("--trained_model", default="./models/trained_model.pth")
args = parser.parse_args()
print(f"=======================================")
print(f"epochs:{args.epochs} / batch_size: {args.batch_size}")
print(f"=======================================")
main(args)
https://tutorials.pytorch.kr/beginner/blitz/cifar10_tutorial.html?highlight=cifar
분류기(Classifier) 학습하기
지금까지 어떻게 신경망을 정의하고, 손실을 계산하며 또 가중치를 갱신하는지에 대해서 배웠습니다. 이제 아마도 이런 생각을 하고 계실텐데요, 데이터는 어떻게 하나요?: 일반적으로 이미지
tutorials.pytorch.kr
'SK네트웍스 Family AI캠프 10기' 카테고리의 다른 글
SK네트웍스 AI캠프 36~37일차 - 2차 단위프로젝트(은행 고객 이탈 분석) (0) | 2025.03.04 |
---|---|
SK네트웍스 AI캠프 35일차 - 추천시스템 (0) | 2025.02.28 |
SK네트웍스 AI캠프 33일차 - 데이터 증강, 예측(XAI, SHAP) (0) | 2025.02.26 |
SK네트웍스 AI캠프 32일차 - 컴퓨터비젼, 사전학습/전이학습/파인튜닝 (0) | 2025.02.25 |
SK네트웍스 AI캠프 31일차 - 이미지처리 (0) | 2025.02.24 |