SK네트웍스 Family AI캠프 10기

SK네트웍스 AI캠프 34일차 - 파이토치 텐서보드, 분류기

netsma 2025. 2. 27. 09:09

 

py -3.12 -m venv .venv
.\.venv\Scripts\activate
py -m pip install --upgrade pip
pip install torch torchvision torchaudio
pip install jupyter torch torchvision torchinfo matplotlib tqdm

pip freeze > requirements.txt
ls

py -3.12 -m venv .venv
.\.venv\Scripts\activate
py -m pip install --upgrade pip
pip install -r .\requirements.txt

 

data_setup.py

from pathlib import Path

from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset

#############################################
# Make Dataset 함수
#############################################

def create_dataset(
    root: str="./data/pizza_steak_sushi",
    train: bool=True,
    transform:transforms=None
) -> Dataset:
   
    if train:
        root = Path(root) / "train"  
    else:
        root = Path(root) / "test"

    return datasets.ImageFolder(root=root, transform=transform)

#############################################
# Make DataLoader 함수
#############################################

def create_dataloader(
    dataset: Dataset,
    batch_size: int=32,
    shuffle: bool=True
):

    return DataLoader(
        dataset=dataset,
        #dataset=create_dataset(root, train, transform),
        batch_size=batch_size, shuffle=shuffle)

 

engine.py

import torch

from tqdm.auto import tqdm
from typing import Dict, List, Tuple

from utils import EarlyStopper

def train_step(model: torch.nn.Module,
                dataloader: torch.utils.data.DataLoader,
                loss_fn: torch.nn.Module,
                optimizer: torch.optim.Optimizer,
                device: torch.device,
                valid_fn=None) -> Tuple[float, float]:

    # Put model in train mode
    model.train()

    # Setup train loss and train accuracy values
    train_loss, train_valid = 0, 0

    # Loop through data loader data batches
    for _, (X, y) in tqdm(enumerate(dataloader)
                        , desc="Train Loop", leave=False, total=len(dataloader)):
        # Send data to target device
        X, y = X.to(device), y.to(device)

        # 1. Forward pass
        y_pred = model(X)

        # 2. Calculate  and accumulate loss
        loss = loss_fn(y_pred, y)
        train_loss += loss.item()

        if valid_fn:
            train_valid += valid_fn(y_pred.argmax(dim=1).cpu().numpy(), y.cpu().numpy())

        # 3. Optimizer zero grad
        optimizer.zero_grad()

        # 4. Loss backward
        loss.backward()

        # 5. Optimizer step
        optimizer.step()

    # Adjust metrics to get average loss and accuracy per batch
    train_loss = train_loss / len(dataloader)
    train_valid = train_valid / len(dataloader)
    return train_loss, train_valid

def test_step ( model: torch.nn.Module,
                dataloader: torch.utils.data.DataLoader,
                loss_fn: torch.nn.Module,
                device: torch.device,
                valid_fn=None) -> Tuple[float, float]:

    # Put model in eval mode
    model.eval()

    # Setup test loss and test accuracy values
    test_loss, test_valid = 0, 0

    # Turn on inference context manager
    with torch.inference_mode():
        # Loop through DataLoader batches
        for _, (X, y) in tqdm(enumerate(dataloader)
                        , desc="Testing Loop", leave=False, total=len(dataloader)):
            # Send data to target device
            X, y = X.to(device), y.to(device)

            # 1. Forward pass
            test_pred_logits = model(X)

            # 2. Calculate and accumulate loss
            loss = loss_fn(test_pred_logits, y)
            test_loss += loss.item()

            # Calculate and accumulate accuracy
            if valid_fn:
                test_valid += valid_fn(test_pred_logits.argmax(dim=1).cpu().numpy(), y.cpu().numpy())

    # Adjust metrics to get average loss and accuracy per batch
    test_loss = test_loss / len(dataloader)
    test_valid = test_valid / len(dataloader)
    return test_loss, test_valid


def train(  model: torch.nn.Module,
            train_dataloader: torch.utils.data.DataLoader,
            test_dataloader: torch.utils.data.DataLoader,
            optimizer: torch.optim.Optimizer,
            loss_fn: torch.nn.Module,
            epochs: int,
            device: torch.device,
            earlystopper: EarlyStopper,
            valid_fn=None) -> Dict[str, List]:

    # Create empty results dictionary
    results = {"train_loss": [],
                "train_valid": [],
                "test_loss": [],
                "test_valid": []
    }

    # Loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs), desc="Epoch Loop", leave=True):
        # 모델 학습
        train_loss, train_valid = train_step(model=model,
                                            dataloader=train_dataloader,
                                            loss_fn=loss_fn,
                                            optimizer=optimizer,
                                            device=device, valid_fn=valid_fn)
        # 모델 평가
        test_loss, test_valid = test_step(model=model,
                                        dataloader=test_dataloader,
                                        loss_fn=loss_fn,
                                        device=device, valid_fn=valid_fn)

        # Print out what's happening
        print(
            f"Epoch: {epoch+1} | "
            f"train_loss: {train_loss:.4f} | "
            f"test_loss: {test_loss:.4f} | "
        )

        # Update results dictionary
        results["train_loss"].append(train_loss)
        results["test_loss"].append(test_loss)
        if valid_fn:
            results["train_valid"].append(train_valid)
            results["test_valid"].append(test_valid)

       
        if not earlystopper.is_continuable(model, test_loss):
            print("Early stopping...")
            break

    # Return the filled results at the end of the epochs
    return results


 

model_builder.py

from torch import nn
from models.vgg_model import TinyVGG

def create_model(
        color_size: int,
        target_size: int,
        is_trained: bool=False
) -> nn.Module:
   
    if is_trained:
        return # 사전학습 모델 적용
   
    return TinyVGG(color_size=color_size , target_size=target_size)

 

util.py

import torch
import numpy as np

class EarlyStopper(object):

    def __init__(self, num_trials, save_path):
        self.num_trials = num_trials
        self.trial_counter = 0
        self.best_loss = np.inf
        self.save_path = save_path

    def is_continuable(self, model, loss):
        if loss < self.best_loss: # 현재 loss가 최고 loss보다 더 낮은 경우
            self.best_loss = loss # 최고 loss를 현재 loss로 업데이트
            self.trial_counter = 0 # 초기화
            torch.save(model, self.save_path) # 최고 loss를 갖은 모델 저장
            return True
        elif self.trial_counter + 1 < self.num_trials: # 현재 loss가 최고 loss보다 작은 경우 & max 시도횟수보다 현재 시도횟수가 작은 경우
            self.trial_counter += 1 # 기존 시도횟수 + 1
            return True
        else: # 현재 정확도가 최고 정확도보다 작은 경우 & 현재 시도횟수가 max 시도횟수보다 큰 경우
            return False

    def get_best_model(self, device):
        return torch.load(self.save_path).to(device)

main.py

import argparse
import torch
from torchvision import datasets, transforms
from data_setup import create_dataloader, create_dataset
from model_builder import create_model
from engine import train
from utils import EarlyStopper

def main(args):
    """
    1. create dataset
    2. create dataloader
    3. create model
    3. training
    """

    transform = transforms.Compose([
        transforms.Resize((64, 64)),
        transforms.ToTensor()
    ]) # 데이터 전처리

    train_dataset = create_dataset(train=True, transform=transform)
    test_dataset = create_dataset(train=False, transform=transform)

    train_dataloader = create_dataloader(dataset=train_dataset, batch_size=args.batch_size)
    test_dataloader = create_dataloader(dataset=test_dataset, batch_size=args.batch_size)

    feature, _ = train_dataset[0]  
    vgg_model = create_model(color_size=feature.shape[0]
                            , target_size=len(train_dataset.classes) , is_trained=False)
   
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    vgg_model.to(device)
   
    loss_fn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(
        params=vgg_model.parameters(), lr=args.lr)
    earlystopper = EarlyStopper(num_trials=5, save_path=args.trained_model)

    train(model=vgg_model, train_dataloader=train_dataloader, test_dataloader=test_dataloader
        , optimizer=optimizer, loss_fn=loss_fn, epochs=args.epochs, device=device
        , earlystopper=earlystopper)

if __name__=="__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--batch_size", default=32, type=int)
    parser.add_argument("--lr", default=0.01, type=float)
    parser.add_argument("--epochs", default=50, type=int)
    #parser.add_argument("--hidden_size", default=10, type=int)
    parser.add_argument("--trained_model", default="./models/trained_model.pth")

    args = parser.parse_args()
    print(f"=======================================")
    print(f"epochs:{args.epochs}  / batch_size: {args.batch_size}")
    print(f"=======================================")
   
    main(args)


 

https://tutorials.pytorch.kr/beginner/blitz/cifar10_tutorial.html?highlight=cifar

 

분류기(Classifier) 학습하기

지금까지 어떻게 신경망을 정의하고, 손실을 계산하며 또 가중치를 갱신하는지에 대해서 배웠습니다. 이제 아마도 이런 생각을 하고 계실텐데요, 데이터는 어떻게 하나요?: 일반적으로 이미지

tutorials.pytorch.kr