added new aiiun script with first draft for pip project

2025-01-29 22:30:57 +01:00 · 2025-01-29 22:30:57 +01:00 · 914d002602
parent 71da7ed2f1
commit 914d002602
6 changed files with 337 additions and 99 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,14 @@
 [build-system]
 requires = ["setuptools>=45", "wheel"]
 build-backend = "setuptools.build_meta"
 [project]
 name = "aiunn"
 version = "0.1.0"
 description = "A brief description of your package"
 readme = "README.md"
 requires-python = ">=3.7"
 license = {file = "LICENSE"}
 authors = [
    {name = "Your Name", email = "your.email@example.com"},
 ]
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,5 @@
 torch
 aiia
 pillow
 torchvision
 sklearn
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,25 @@
 from setuptools import setup, find_packages
 setup(
    name="aiunn",
    version="0.1.0",
    packages=find_packages(where="src"),
    package_dir={"": "src"},
    install_requires=[
        line.strip()
        for line in open("requirements.txt")
        if line.strip() and not line.startswith("#")
    ],
    author="Falko Habel",
    author_email="falko.habel@gmx.de",
    description="Finetuner for image upscaling using AIIA",
    long_description=open("README.md").read(),
    long_description_content_type="text/markdown",
    url="https://github.com/yourusername/aiunn",
    classifiers=[
        "Programming Language :: Python :: 3",
        "License :: OSI Approved :: MIT License",
        "Operating System :: OS Independent",
    ],
    python_requires=">=3.7",
 )
--- a/src/aiunn/init.py
+++ b/src/aiunn/init.py
@ -0,0 +1,6 @@
 from .finetune import *
 from .inference import UpScaler
 __version__ = "0.1.0"
--- a/src/aiunn/finetune.py
+++ b/src/aiunn/finetune.py
@ -7,138 +7,253 @@ from torch.utils.data import Dataset, DataLoader
 import torchvision.transforms as transforms
 from aiia.model import AIIABase
 from sklearn.model_selection import train_test_split
 from typing import Dict, List, Union
 # Step 1: Define Custom Dataset Class
 class ImageDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform
-        
+
    def __len__(self):
        return len(self.dataframe)
-    
+
    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
-        
+
        # Decode image_512 from bytes
        img_bytes = row['image_512']
        img_stream = io.BytesIO(img_bytes)
        low_res_image = Image.open(img_stream).convert('RGB')
-        
+
        # Decode image_1024 from bytes
        high_res_bytes = row['image_1024']
        high_stream = io.BytesIO(high_res_bytes)
        high_res_image = Image.open(high_stream).convert('RGB')
-        
+
        # Apply transformations if specified
        if self.transform:
            low_res_image = self.transform(low_res_image)
            high_res_image = self.transform(high_res_image)
-            
+
        return {'low_res': low_res_image, 'high_res': high_res_image}
 # Step 2: Load and Preprocess Data
 # Read the dataset (assuming it's a DataFrame with columns 'image_512' and 'image_1024')
 df1 = pd.read_parquet('/root/training_data/vision-dataset/image_upscaler.parquet')
 df2 = pd.read_parquet('/root/training_data/vision-dataset/image_vec_upscaler.parquet')
-# Combine the two datasets into one DataFrame
+class TrainingBase:
-df = pd.concat([df1, df2], ignore_index=True)
+    def __init__(self,
-
+                 model_name: str,
-# Split into training and validation sets
+                 dataset_paths: Union[List[str], Dict[str, str]],
-train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)
+                 batch_size: int = 32,
-
+                 learning_rate: float = 0.001,
-# Define preprocessing transforms
+                 num_workers: int = 4,
-transform = transforms.Compose([
+                 train_ratio: float = 0.8):
-    transforms.ToTensor(),
+        """
-    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
+        Base class for training models with multiple dataset support
 ])
 train_dataset = ImageDataset(train_df, transform=transform)
 val_dataset = ImageDataset(val_df, transform=transform)
 # Create DataLoaders
 batch_size = 2
 train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
 val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
 # Step 3: Load Pre-trained Model and Modify for Upscaling
 model = AIIABase.load("AIIA-Base-512")
 # Freeze original CNN layers to prevent catastrophic forgetting
 for param in model.cnn.parameters():
    param.requires_grad = False
 # Add upsample module
 hidden_size = model.config.hidden_size  # Assuming this is defined in your model's config
 model.upsample = torch.nn.Sequential(
    nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
    nn.Conv2d(hidden_size, 3, kernel_size=3, padding=1)
 )
 # Step 4: Define Loss Function and Optimizer
 criterion = torch.nn.MSELoss()
 optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)  # Adjust learning rate as needed
 # Alternatively, if you want to train only the new layers:
 params_to_update = []
 for name, param in model.named_parameters():
    if 'upsample' in name:
        params_to_update.append(param)
 optimizer = torch.optim.Adam(params_to_update, lr=0.001)
 # Step 5: Training Loop
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 best_val_loss = float('inf')
 num_epochs = 10  # Adjust as needed
 for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch in train_loader:
        low_res = batch['low_res'].to(device)
        high_res = batch['high_res'].to(device)
-        # Forward pass
+        Args:
-        features = model.cnn(low_res)
+            model_name (str): Name of the model to initialize
-        outputs = model.upsample(features)
+            dataset_paths (Union[List[str], Dict[str, str]]): Paths to datasets (train and optional validation)
            batch_size (int): Batch size for training
            learning_rate (float): Learning rate for optimizer
            num_workers (int): Number of workers for data loading
            train_ratio (float): Ratio of data to use for training (rest goes to validation)
        """
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.batch_size = batch_size
        self.num_workers = num_workers
-        loss = criterion(outputs, high_res)
+        # Initialize datasets and loaders
        self.dataset_paths = dataset_paths
        self._initialize_datasets()
-        # Backward pass and optimize
+        # Initialize model and training parameters
-        optimizer.zero_grad()
+        self.model_name = model_name
-        loss.backward()
+        self.learning_rate = learning_rate
-        optimizer.step()
+        self._initialize_model()
-        running_loss += loss.item()
+    def _initialize_datasets(self):
        """Helper method to initialize datasets"""
        raise NotImplementedError("This method should be implemented in child classes")
-    epoch_loss = running_loss / len(train_loader)
+    def _initialize_model(self):
-    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')
+        """Helper method to initialize model architecture"""
        raise NotImplementedError("This method should be implemented in child classes")
-    # Validation Step
+    def train(self, num_epochs: int = 10):
-    model.eval()
+        """Train the model for specified number of epochs"""
-    val_loss = 0.0
+        self.model.to(self.device)
-    
+        
-    with torch.no_grad():
+        for epoch in range(num_epochs):
-        for batch in val_loader:
+            print(f"Epoch {epoch+1}/{num_epochs}")
            low_res = batch['low_res'].to(device)
            high_res = batch['high_res'].to(device)
-            features = model.cnn(low_res)
+            # Train phase
-            outputs = model.upsample(features)
+            self._train_epoch()
-            loss = criterion(outputs, high_res)
+            # Validation phase
-            val_loss += loss.item()
+            self._validate_epoch()
-
+            
-    print(f"Validation Loss: {val_loss:.4f}")
+            # Save best model based on validation loss
            if self.current_val_loss < self.best_val_loss:
                self.save_model()
-    if val_loss < best_val_loss:
+    def _train_epoch(self):
-        best_val_loss = val_loss
+        """Train model for one epoch"""
-        model.save("AIIA-base-512-upscaler")
+        raise NotImplementedError("This method should be implemented in child classes")
-        print("Best model saved!")
+    
    def _validate_epoch(self):
        """Validate model performance"""
        raise NotImplementedError("This method should be implemented in child classes")
    def save_model(self):
        """Save current best model"""
        torch.save({
            'model_state_dict': self.model.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            'best_val_loss': self.best_val_loss
        }, f"{self.model_name}_best.pth")
 class Finetuner(TrainingBase):
    def __init__(self,
                 model_name: str = "AIIA-Base-512",
                 dataset_paths: Union[List[str], Dict[str, str]] = None,
                 batch_size: int = 32,
                 learning_rate: float = 0.001,
                 num_workers: int = 4,
                 train_ratio: float = 0.8):
        """
        Specialized trainer for image super resolution tasks
        Args:
            Same as TrainingBase
        """
        super().__init__(model_name, dataset_paths, batch_size, learning_rate, num_workers, train_ratio)
    def _initialize_datasets(self):
        """Initialize image datasets"""
        # Load dataframes from parquet files
        if isinstance(self.dataset_paths, dict):
            df_train = pd.read_parquet(self.dataset_paths['train'])
            df_val = pd.read_parquet(self.dataset_paths['val']) if 'val' in self.dataset_paths else None
        elif isinstance(self.dataset_paths, list):
            df_train = pd.concat([pd.read_parquet(path) for path in self.dataset_paths], ignore_index=True)
            df_val = None
        else:
            raise ValueError("Invalid dataset_paths format")
        # Split into train and validation sets if needed
        if df_val is None:
            df_train, df_val = train_test_split(df_train, test_size=1 - self.train_ratio, random_state=42)
        # Define preprocessing transforms
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
        ])
        # Create datasets and dataloaders
        self.train_dataset = ImageDataset(df_train, transform=self.transform)
        self.val_dataset = ImageDataset(df_val, transform=self.transform)
        self.train_loader = DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            shuffle=True,
            num_workers=self.num_workers
        )
        self.val_loader = DataLoader(
            self.val_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=self.num_workers
        )
    def _initialize_model(self):
        """Initialize and modify the super resolution model"""
        # Load base model
        self.model = AIIABase.load(self.model_name)
        # Freeze CNN layers
        for param in self.model.cnn.parameters():
            param.requires_grad = False
        # Add upscaling layer
        hidden_size = self.model.config.hidden_size
        self.model.upsample = nn.Sequential(
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
            nn.Conv2d(hidden_size, 3, kernel_size=3, padding=1)
        )
        # Initialize optimizer and loss function
        self.criterion = nn.MSELoss()
        self.optimizer = torch.optim.Adam(
            [param for param in self.model.parameters() if 'upsample' in str(param)],
            lr=self.learning_rate
        )
        self.best_val_loss = float('inf')
    def _train_epoch(self):
        """Train model for one epoch"""
        self.model.train()
        running_loss = 0.0
        for batch in self.train_loader:
            low_res = batch['low_res'].to(self.device)
            high_res = batch['high_res'].to(self.device)
            # Forward pass
            features = self.model.cnn(low_res)
            outputs = self.model.upsample(features)
            loss = self.criterion(outputs, high_res)
            # Backward pass and optimize
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            running_loss += loss.item()
        epoch_loss = running_loss / len(self.train_loader)
        print(f"Train Loss: {epoch_loss:.4f}")
    def _validate_epoch(self):
        """Validate model performance"""
        self.model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for batch in self.val_loader:
                low_res = batch['low_res'].to(self.device)
                high_res = batch['high_res'].to(self.device)
                features = self.model.cnn(low_res)
                outputs = self.model.upsample(features)
                loss = self.criterion(outputs, high_res)
                val_loss += loss.item()
        avg_val_loss = val_loss / len(self.val_loader)
        print(f"Validation Loss: {avg_val_loss:.4f}")
        # Update best model
        if avg_val_loss < self.best_val_loss:
            self.best_val_loss = avg_val_loss
    def __repr__(self):
        return f"Model ({self.model_name}, batch_size={self.batch_size})"
 # Example usage:
 if __name__ == "__main__":
    finetuner = Finetuner(
        train_parquet_path="/root/training_data/vision-dataset/image_upscaler.parquet",
        val_parquet_path="/root/training_data/vision-dataset/image_vec_upscaler.parquet",
        batch_size=2,
        learning_rate=0.001
    )
    finetuner.train_model(num_epochs=10)
--- a/src/aiunn/inference.py
+++ b/src/aiunn/inference.py
@ -0,0 +1,73 @@
 import torch
 from PIL import Image
 import torchvision.transforms as T
 from torch.nn import functional as F
 from aiia.model import AIIABase
 class UpScaler:
    def __init__(self, model_path="AIIA-base-512-upscaler", device="cuda"):
        self.device = torch.device(device)
        self.model = AIIABase.load(model_path).to(self.device)
        self.model.eval()
        # Preprocessing transforms
        self.preprocess = T.Compose([
            T.Lambda(lambda img: self._pad_to_square(img)),
            T.Resize(512),
            T.ToTensor(),
            T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
        ])
    def _pad_to_square(self, pil_img):
        """Pad image to square while maintaining aspect ratio"""
        w, h = pil_img.size
        max_side = max(w, h)
        hp = (max_side - w) // 2
        vp = (max_side - h) // 2
        padding = (hp, vp, max_side - w - hp, max_side - h - vp)
        return T.functional.pad(pil_img, padding, 0, 'constant')
    def _remove_padding(self, tensor, original_size):
        """Remove padding added during preprocessing"""
        _, _, h, w = tensor.shape
        orig_w, orig_h = original_size
        # Calculate scale factor
        scale = 512 / max(orig_w, orig_h)
        new_w = int(orig_w * scale)
        new_h = int(orig_h * scale)
        # Calculate padding offsets
        pad_w = (512 - new_w) // 2
        pad_h = (512 - new_h) // 2
        # Remove padding
        unpad = tensor[:, :, pad_h:pad_h+new_h, pad_w:pad_w+new_w]
        # Resize to target 2x resolution
        return F.interpolate(unpad, size=(orig_h*2, orig_w*2), mode='bilinear', align_corners=False)
    def upscale(self, input_image):
        # Preprocess
        original_size = input_image.size
        input_tensor = self.preprocess(input_image).unsqueeze(0).to(self.device)
        # Inference
        with torch.no_grad():
            features = self.model.cnn(input_tensor)
            output = self.model.upsample(features)
        # Postprocess
        output = self._remove_padding(output, original_size)
        # Convert to PIL Image
        output = output.squeeze(0).cpu().detach()
        output = (output * 0.5 + 0.5).clamp(0, 1)
        return T.functional.to_pil_image(output)
 # Usage example
 if __name__ == "__main__":
    upscaler = UpScaler()
    input_image = Image.open("input.jpg")
    output_image = upscaler.upscale(input_image)
    output_image.save("output_2x.jpg")