added new aiiun script with first draft for pip project
This commit is contained in:
parent
71da7ed2f1
commit
914d002602
|
@ -0,0 +1,14 @@
|
|||
[build-system]
|
||||
requires = ["setuptools>=45", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "aiunn"
|
||||
version = "0.1.0"
|
||||
description = "A brief description of your package"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.7"
|
||||
license = {file = "LICENSE"}
|
||||
authors = [
|
||||
{name = "Your Name", email = "your.email@example.com"},
|
||||
]
|
|
@ -0,0 +1,5 @@
|
|||
torch
|
||||
aiia
|
||||
pillow
|
||||
torchvision
|
||||
sklearn
|
|
@ -0,0 +1,25 @@
|
|||
from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name="aiunn",
|
||||
version="0.1.0",
|
||||
packages=find_packages(where="src"),
|
||||
package_dir={"": "src"},
|
||||
install_requires=[
|
||||
line.strip()
|
||||
for line in open("requirements.txt")
|
||||
if line.strip() and not line.startswith("#")
|
||||
],
|
||||
author="Falko Habel",
|
||||
author_email="falko.habel@gmx.de",
|
||||
description="Finetuner for image upscaling using AIIA",
|
||||
long_description=open("README.md").read(),
|
||||
long_description_content_type="text/markdown",
|
||||
url="https://github.com/yourusername/aiunn",
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
],
|
||||
python_requires=">=3.7",
|
||||
)
|
|
@ -0,0 +1,6 @@
|
|||
|
||||
from .finetune import *
|
||||
from .inference import UpScaler
|
||||
|
||||
__version__ = "0.1.0"
|
||||
|
|
@ -7,138 +7,253 @@ from torch.utils.data import Dataset, DataLoader
|
|||
import torchvision.transforms as transforms
|
||||
from aiia.model import AIIABase
|
||||
from sklearn.model_selection import train_test_split
|
||||
from typing import Dict, List, Union
|
||||
|
||||
|
||||
# Step 1: Define Custom Dataset Class
|
||||
class ImageDataset(Dataset):
|
||||
def __init__(self, dataframe, transform=None):
|
||||
self.dataframe = dataframe
|
||||
self.transform = transform
|
||||
|
||||
|
||||
def __len__(self):
|
||||
return len(self.dataframe)
|
||||
|
||||
|
||||
def __getitem__(self, idx):
|
||||
row = self.dataframe.iloc[idx]
|
||||
|
||||
|
||||
# Decode image_512 from bytes
|
||||
img_bytes = row['image_512']
|
||||
img_stream = io.BytesIO(img_bytes)
|
||||
low_res_image = Image.open(img_stream).convert('RGB')
|
||||
|
||||
|
||||
# Decode image_1024 from bytes
|
||||
high_res_bytes = row['image_1024']
|
||||
high_stream = io.BytesIO(high_res_bytes)
|
||||
high_res_image = Image.open(high_stream).convert('RGB')
|
||||
|
||||
|
||||
# Apply transformations if specified
|
||||
if self.transform:
|
||||
low_res_image = self.transform(low_res_image)
|
||||
high_res_image = self.transform(high_res_image)
|
||||
|
||||
|
||||
return {'low_res': low_res_image, 'high_res': high_res_image}
|
||||
|
||||
|
||||
|
||||
# Step 2: Load and Preprocess Data
|
||||
# Read the dataset (assuming it's a DataFrame with columns 'image_512' and 'image_1024')
|
||||
df1 = pd.read_parquet('/root/training_data/vision-dataset/image_upscaler.parquet')
|
||||
df2 = pd.read_parquet('/root/training_data/vision-dataset/image_vec_upscaler.parquet')
|
||||
|
||||
# Combine the two datasets into one DataFrame
|
||||
df = pd.concat([df1, df2], ignore_index=True)
|
||||
|
||||
# Split into training and validation sets
|
||||
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)
|
||||
|
||||
# Define preprocessing transforms
|
||||
transform = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
|
||||
])
|
||||
|
||||
train_dataset = ImageDataset(train_df, transform=transform)
|
||||
val_dataset = ImageDataset(val_df, transform=transform)
|
||||
|
||||
# Create DataLoaders
|
||||
batch_size = 2
|
||||
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
|
||||
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
|
||||
|
||||
# Step 3: Load Pre-trained Model and Modify for Upscaling
|
||||
model = AIIABase.load("AIIA-Base-512")
|
||||
|
||||
# Freeze original CNN layers to prevent catastrophic forgetting
|
||||
for param in model.cnn.parameters():
|
||||
param.requires_grad = False
|
||||
|
||||
# Add upsample module
|
||||
hidden_size = model.config.hidden_size # Assuming this is defined in your model's config
|
||||
model.upsample = torch.nn.Sequential(
|
||||
nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
|
||||
nn.Conv2d(hidden_size, 3, kernel_size=3, padding=1)
|
||||
)
|
||||
|
||||
# Step 4: Define Loss Function and Optimizer
|
||||
criterion = torch.nn.MSELoss()
|
||||
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) # Adjust learning rate as needed
|
||||
|
||||
# Alternatively, if you want to train only the new layers:
|
||||
params_to_update = []
|
||||
for name, param in model.named_parameters():
|
||||
if 'upsample' in name:
|
||||
params_to_update.append(param)
|
||||
optimizer = torch.optim.Adam(params_to_update, lr=0.001)
|
||||
|
||||
# Step 5: Training Loop
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
model.to(device)
|
||||
|
||||
best_val_loss = float('inf')
|
||||
num_epochs = 10 # Adjust as needed
|
||||
|
||||
for epoch in range(num_epochs):
|
||||
model.train()
|
||||
running_loss = 0.0
|
||||
|
||||
for batch in train_loader:
|
||||
low_res = batch['low_res'].to(device)
|
||||
high_res = batch['high_res'].to(device)
|
||||
class TrainingBase:
|
||||
def __init__(self,
|
||||
model_name: str,
|
||||
dataset_paths: Union[List[str], Dict[str, str]],
|
||||
batch_size: int = 32,
|
||||
learning_rate: float = 0.001,
|
||||
num_workers: int = 4,
|
||||
train_ratio: float = 0.8):
|
||||
"""
|
||||
Base class for training models with multiple dataset support
|
||||
|
||||
# Forward pass
|
||||
features = model.cnn(low_res)
|
||||
outputs = model.upsample(features)
|
||||
Args:
|
||||
model_name (str): Name of the model to initialize
|
||||
dataset_paths (Union[List[str], Dict[str, str]]): Paths to datasets (train and optional validation)
|
||||
batch_size (int): Batch size for training
|
||||
learning_rate (float): Learning rate for optimizer
|
||||
num_workers (int): Number of workers for data loading
|
||||
train_ratio (float): Ratio of data to use for training (rest goes to validation)
|
||||
"""
|
||||
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
self.batch_size = batch_size
|
||||
self.num_workers = num_workers
|
||||
|
||||
loss = criterion(outputs, high_res)
|
||||
# Initialize datasets and loaders
|
||||
self.dataset_paths = dataset_paths
|
||||
self._initialize_datasets()
|
||||
|
||||
# Backward pass and optimize
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
# Initialize model and training parameters
|
||||
self.model_name = model_name
|
||||
self.learning_rate = learning_rate
|
||||
self._initialize_model()
|
||||
|
||||
running_loss += loss.item()
|
||||
def _initialize_datasets(self):
|
||||
"""Helper method to initialize datasets"""
|
||||
raise NotImplementedError("This method should be implemented in child classes")
|
||||
|
||||
epoch_loss = running_loss / len(train_loader)
|
||||
print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')
|
||||
def _initialize_model(self):
|
||||
"""Helper method to initialize model architecture"""
|
||||
raise NotImplementedError("This method should be implemented in child classes")
|
||||
|
||||
# Validation Step
|
||||
model.eval()
|
||||
val_loss = 0.0
|
||||
|
||||
with torch.no_grad():
|
||||
for batch in val_loader:
|
||||
low_res = batch['low_res'].to(device)
|
||||
high_res = batch['high_res'].to(device)
|
||||
def train(self, num_epochs: int = 10):
|
||||
"""Train the model for specified number of epochs"""
|
||||
self.model.to(self.device)
|
||||
|
||||
for epoch in range(num_epochs):
|
||||
print(f"Epoch {epoch+1}/{num_epochs}")
|
||||
|
||||
features = model.cnn(low_res)
|
||||
outputs = model.upsample(features)
|
||||
# Train phase
|
||||
self._train_epoch()
|
||||
|
||||
loss = criterion(outputs, high_res)
|
||||
val_loss += loss.item()
|
||||
|
||||
print(f"Validation Loss: {val_loss:.4f}")
|
||||
# Validation phase
|
||||
self._validate_epoch()
|
||||
|
||||
# Save best model based on validation loss
|
||||
if self.current_val_loss < self.best_val_loss:
|
||||
self.save_model()
|
||||
|
||||
if val_loss < best_val_loss:
|
||||
best_val_loss = val_loss
|
||||
model.save("AIIA-base-512-upscaler")
|
||||
print("Best model saved!")
|
||||
def _train_epoch(self):
|
||||
"""Train model for one epoch"""
|
||||
raise NotImplementedError("This method should be implemented in child classes")
|
||||
|
||||
def _validate_epoch(self):
|
||||
"""Validate model performance"""
|
||||
raise NotImplementedError("This method should be implemented in child classes")
|
||||
|
||||
def save_model(self):
|
||||
"""Save current best model"""
|
||||
torch.save({
|
||||
'model_state_dict': self.model.state_dict(),
|
||||
'optimizer_state_dict': self.optimizer.state_dict(),
|
||||
'best_val_loss': self.best_val_loss
|
||||
}, f"{self.model_name}_best.pth")
|
||||
|
||||
class Finetuner(TrainingBase):
|
||||
def __init__(self,
|
||||
model_name: str = "AIIA-Base-512",
|
||||
dataset_paths: Union[List[str], Dict[str, str]] = None,
|
||||
batch_size: int = 32,
|
||||
learning_rate: float = 0.001,
|
||||
num_workers: int = 4,
|
||||
train_ratio: float = 0.8):
|
||||
"""
|
||||
Specialized trainer for image super resolution tasks
|
||||
|
||||
Args:
|
||||
Same as TrainingBase
|
||||
"""
|
||||
super().__init__(model_name, dataset_paths, batch_size, learning_rate, num_workers, train_ratio)
|
||||
|
||||
def _initialize_datasets(self):
|
||||
"""Initialize image datasets"""
|
||||
# Load dataframes from parquet files
|
||||
if isinstance(self.dataset_paths, dict):
|
||||
df_train = pd.read_parquet(self.dataset_paths['train'])
|
||||
df_val = pd.read_parquet(self.dataset_paths['val']) if 'val' in self.dataset_paths else None
|
||||
elif isinstance(self.dataset_paths, list):
|
||||
df_train = pd.concat([pd.read_parquet(path) for path in self.dataset_paths], ignore_index=True)
|
||||
df_val = None
|
||||
else:
|
||||
raise ValueError("Invalid dataset_paths format")
|
||||
|
||||
# Split into train and validation sets if needed
|
||||
if df_val is None:
|
||||
df_train, df_val = train_test_split(df_train, test_size=1 - self.train_ratio, random_state=42)
|
||||
|
||||
# Define preprocessing transforms
|
||||
self.transform = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
|
||||
])
|
||||
|
||||
# Create datasets and dataloaders
|
||||
self.train_dataset = ImageDataset(df_train, transform=self.transform)
|
||||
self.val_dataset = ImageDataset(df_val, transform=self.transform)
|
||||
|
||||
self.train_loader = DataLoader(
|
||||
self.train_dataset,
|
||||
batch_size=self.batch_size,
|
||||
shuffle=True,
|
||||
num_workers=self.num_workers
|
||||
)
|
||||
|
||||
self.val_loader = DataLoader(
|
||||
self.val_dataset,
|
||||
batch_size=self.batch_size,
|
||||
shuffle=False,
|
||||
num_workers=self.num_workers
|
||||
)
|
||||
|
||||
def _initialize_model(self):
|
||||
"""Initialize and modify the super resolution model"""
|
||||
# Load base model
|
||||
self.model = AIIABase.load(self.model_name)
|
||||
|
||||
# Freeze CNN layers
|
||||
for param in self.model.cnn.parameters():
|
||||
param.requires_grad = False
|
||||
|
||||
# Add upscaling layer
|
||||
hidden_size = self.model.config.hidden_size
|
||||
self.model.upsample = nn.Sequential(
|
||||
nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
|
||||
nn.Conv2d(hidden_size, 3, kernel_size=3, padding=1)
|
||||
)
|
||||
|
||||
# Initialize optimizer and loss function
|
||||
self.criterion = nn.MSELoss()
|
||||
self.optimizer = torch.optim.Adam(
|
||||
[param for param in self.model.parameters() if 'upsample' in str(param)],
|
||||
lr=self.learning_rate
|
||||
)
|
||||
|
||||
self.best_val_loss = float('inf')
|
||||
|
||||
def _train_epoch(self):
|
||||
"""Train model for one epoch"""
|
||||
self.model.train()
|
||||
running_loss = 0.0
|
||||
|
||||
for batch in self.train_loader:
|
||||
low_res = batch['low_res'].to(self.device)
|
||||
high_res = batch['high_res'].to(self.device)
|
||||
|
||||
# Forward pass
|
||||
features = self.model.cnn(low_res)
|
||||
outputs = self.model.upsample(features)
|
||||
|
||||
loss = self.criterion(outputs, high_res)
|
||||
|
||||
# Backward pass and optimize
|
||||
self.optimizer.zero_grad()
|
||||
loss.backward()
|
||||
self.optimizer.step()
|
||||
|
||||
running_loss += loss.item()
|
||||
|
||||
epoch_loss = running_loss / len(self.train_loader)
|
||||
print(f"Train Loss: {epoch_loss:.4f}")
|
||||
|
||||
def _validate_epoch(self):
|
||||
"""Validate model performance"""
|
||||
self.model.eval()
|
||||
val_loss = 0.0
|
||||
|
||||
with torch.no_grad():
|
||||
for batch in self.val_loader:
|
||||
low_res = batch['low_res'].to(self.device)
|
||||
high_res = batch['high_res'].to(self.device)
|
||||
|
||||
features = self.model.cnn(low_res)
|
||||
outputs = self.model.upsample(features)
|
||||
|
||||
loss = self.criterion(outputs, high_res)
|
||||
val_loss += loss.item()
|
||||
|
||||
avg_val_loss = val_loss / len(self.val_loader)
|
||||
print(f"Validation Loss: {avg_val_loss:.4f}")
|
||||
|
||||
# Update best model
|
||||
if avg_val_loss < self.best_val_loss:
|
||||
self.best_val_loss = avg_val_loss
|
||||
|
||||
def __repr__(self):
|
||||
return f"Model ({self.model_name}, batch_size={self.batch_size})"
|
||||
|
||||
|
||||
# Example usage:
|
||||
if __name__ == "__main__":
|
||||
finetuner = Finetuner(
|
||||
train_parquet_path="/root/training_data/vision-dataset/image_upscaler.parquet",
|
||||
val_parquet_path="/root/training_data/vision-dataset/image_vec_upscaler.parquet",
|
||||
batch_size=2,
|
||||
learning_rate=0.001
|
||||
)
|
||||
|
||||
finetuner.train_model(num_epochs=10)
|
|
@ -0,0 +1,73 @@
|
|||
import torch
|
||||
from PIL import Image
|
||||
import torchvision.transforms as T
|
||||
from torch.nn import functional as F
|
||||
from aiia.model import AIIABase
|
||||
|
||||
class UpScaler:
|
||||
def __init__(self, model_path="AIIA-base-512-upscaler", device="cuda"):
|
||||
self.device = torch.device(device)
|
||||
self.model = AIIABase.load(model_path).to(self.device)
|
||||
self.model.eval()
|
||||
|
||||
# Preprocessing transforms
|
||||
self.preprocess = T.Compose([
|
||||
T.Lambda(lambda img: self._pad_to_square(img)),
|
||||
T.Resize(512),
|
||||
T.ToTensor(),
|
||||
T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
|
||||
])
|
||||
|
||||
def _pad_to_square(self, pil_img):
|
||||
"""Pad image to square while maintaining aspect ratio"""
|
||||
w, h = pil_img.size
|
||||
max_side = max(w, h)
|
||||
hp = (max_side - w) // 2
|
||||
vp = (max_side - h) // 2
|
||||
padding = (hp, vp, max_side - w - hp, max_side - h - vp)
|
||||
return T.functional.pad(pil_img, padding, 0, 'constant')
|
||||
|
||||
def _remove_padding(self, tensor, original_size):
|
||||
"""Remove padding added during preprocessing"""
|
||||
_, _, h, w = tensor.shape
|
||||
orig_w, orig_h = original_size
|
||||
|
||||
# Calculate scale factor
|
||||
scale = 512 / max(orig_w, orig_h)
|
||||
new_w = int(orig_w * scale)
|
||||
new_h = int(orig_h * scale)
|
||||
|
||||
# Calculate padding offsets
|
||||
pad_w = (512 - new_w) // 2
|
||||
pad_h = (512 - new_h) // 2
|
||||
|
||||
# Remove padding
|
||||
unpad = tensor[:, :, pad_h:pad_h+new_h, pad_w:pad_w+new_w]
|
||||
|
||||
# Resize to target 2x resolution
|
||||
return F.interpolate(unpad, size=(orig_h*2, orig_w*2), mode='bilinear', align_corners=False)
|
||||
|
||||
def upscale(self, input_image):
|
||||
# Preprocess
|
||||
original_size = input_image.size
|
||||
input_tensor = self.preprocess(input_image).unsqueeze(0).to(self.device)
|
||||
|
||||
# Inference
|
||||
with torch.no_grad():
|
||||
features = self.model.cnn(input_tensor)
|
||||
output = self.model.upsample(features)
|
||||
|
||||
# Postprocess
|
||||
output = self._remove_padding(output, original_size)
|
||||
|
||||
# Convert to PIL Image
|
||||
output = output.squeeze(0).cpu().detach()
|
||||
output = (output * 0.5 + 0.5).clamp(0, 1)
|
||||
return T.functional.to_pil_image(output)
|
||||
|
||||
# Usage example
|
||||
if __name__ == "__main__":
|
||||
upscaler = UpScaler()
|
||||
input_image = Image.open("input.jpg")
|
||||
output_image = upscaler.upscale(input_image)
|
||||
output_image.save("output_2x.jpg")
|
Loading…
Reference in New Issue