added new aiiun script with first draft for pip project

2025-01-29 22:30:57 +01:00 · 2025-01-29 22:30:57 +01:00 · 914d002602
parent 71da7ed2f1
commit 914d002602
6 changed files with 337 additions and 99 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,14 @@
+[build-system]
+requires = ["setuptools>=45", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "aiunn"
+version = "0.1.0"
+description = "A brief description of your package"
+readme = "README.md"
+requires-python = ">=3.7"
+license = {file = "LICENSE"}
+authors = [
+    {name = "Your Name", email = "your.email@example.com"},
+]
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,5 @@
+torch
+aiia
+pillow
+torchvision
+sklearn
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,25 @@
+from setuptools import setup, find_packages
+
+setup(
+    name="aiunn",
+    version="0.1.0",
+    packages=find_packages(where="src"),
+    package_dir={"": "src"},
+    install_requires=[
+        line.strip()
+        for line in open("requirements.txt")
+        if line.strip() and not line.startswith("#")
+    ],
+    author="Falko Habel",
+    author_email="falko.habel@gmx.de",
+    description="Finetuner for image upscaling using AIIA",
+    long_description=open("README.md").read(),
+    long_description_content_type="text/markdown",
+    url="https://github.com/yourusername/aiunn",
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+    ],
+    python_requires=">=3.7",
+)
--- a/src/aiunn/init.py
+++ b/src/aiunn/init.py
@ -0,0 +1,6 @@
+
+from .finetune import *
+from .inference import UpScaler
+
+__version__ = "0.1.0"
+
--- a/src/aiunn/finetune.py
+++ b/src/aiunn/finetune.py
@ -7,138 +7,253 @@ from torch.utils.data import Dataset, DataLoader
 import torchvision.transforms as transforms
 from aiia.model import AIIABase
 from sklearn.model_selection import train_test_split
+from typing import Dict, List, Union


-# Step 1: Define Custom Dataset Class
 class ImageDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform
-        
+
    def __len__(self):
        return len(self.dataframe)
-    
+
    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
-        
+
        # Decode image_512 from bytes
        img_bytes = row['image_512']
        img_stream = io.BytesIO(img_bytes)
        low_res_image = Image.open(img_stream).convert('RGB')
-        
+
        # Decode image_1024 from bytes
        high_res_bytes = row['image_1024']
        high_stream = io.BytesIO(high_res_bytes)
        high_res_image = Image.open(high_stream).convert('RGB')
-        
+
        # Apply transformations if specified
        if self.transform:
            low_res_image = self.transform(low_res_image)
            high_res_image = self.transform(high_res_image)
-            
+
        return {'low_res': low_res_image, 'high_res': high_res_image}



-# Step 2: Load and Preprocess Data
-# Read the dataset (assuming it's a DataFrame with columns 'image_512' and 'image_1024')
-df1 = pd.read_parquet('/root/training_data/vision-dataset/image_upscaler.parquet')
-df2 = pd.read_parquet('/root/training_data/vision-dataset/image_vec_upscaler.parquet')

-# Combine the two datasets into one DataFrame
-df = pd.concat([df1, df2], ignore_index=True)
-
-# Split into training and validation sets
-train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)
-
-# Define preprocessing transforms
-transform = transforms.Compose([
-    transforms.ToTensor(),
-    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
-])
-
-train_dataset = ImageDataset(train_df, transform=transform)
-val_dataset = ImageDataset(val_df, transform=transform)
-
-# Create DataLoaders
-batch_size = 2
-train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
-val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
-
-# Step 3: Load Pre-trained Model and Modify for Upscaling
-model = AIIABase.load("AIIA-Base-512")
-
-# Freeze original CNN layers to prevent catastrophic forgetting
-for param in model.cnn.parameters():
-    param.requires_grad = False
-
-# Add upsample module
-hidden_size = model.config.hidden_size  # Assuming this is defined in your model's config
-model.upsample = torch.nn.Sequential(
-    nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
-    nn.Conv2d(hidden_size, 3, kernel_size=3, padding=1)
-)
-
-# Step 4: Define Loss Function and Optimizer
-criterion = torch.nn.MSELoss()
-optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)  # Adjust learning rate as needed
-
-# Alternatively, if you want to train only the new layers:
-params_to_update = []
-for name, param in model.named_parameters():
-    if 'upsample' in name:
-        params_to_update.append(param)
-optimizer = torch.optim.Adam(params_to_update, lr=0.001)
-
-# Step 5: Training Loop
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model.to(device)
-
-best_val_loss = float('inf')
-num_epochs = 10  # Adjust as needed
-
-for epoch in range(num_epochs):
-    model.train()
-    running_loss = 0.0
-    
-    for batch in train_loader:
-        low_res = batch['low_res'].to(device)
-        high_res = batch['high_res'].to(device)
+class TrainingBase:
+    def __init__(self,
+                 model_name: str,
+                 dataset_paths: Union[List[str], Dict[str, str]],
+                 batch_size: int = 32,
+                 learning_rate: float = 0.001,
+                 num_workers: int = 4,
+                 train_ratio: float = 0.8):
+        """
+        Base class for training models with multiple dataset support
        
-        # Forward pass
-        features = model.cnn(low_res)
-        outputs = model.upsample(features)
+        Args:
+            model_name (str): Name of the model to initialize
+            dataset_paths (Union[List[str], Dict[str, str]]): Paths to datasets (train and optional validation)
+            batch_size (int): Batch size for training
+            learning_rate (float): Learning rate for optimizer
+            num_workers (int): Number of workers for data loading
+            train_ratio (float): Ratio of data to use for training (rest goes to validation)
+        """
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.batch_size = batch_size
+        self.num_workers = num_workers
        
-        loss = criterion(outputs, high_res)
+        # Initialize datasets and loaders
+        self.dataset_paths = dataset_paths
+        self._initialize_datasets()
        
-        # Backward pass and optimize
-        optimizer.zero_grad()
-        loss.backward()
-        optimizer.step()
+        # Initialize model and training parameters
+        self.model_name = model_name
+        self.learning_rate = learning_rate
+        self._initialize_model()
        
-        running_loss += loss.item()
+    def _initialize_datasets(self):
+        """Helper method to initialize datasets"""
+        raise NotImplementedError("This method should be implemented in child classes")
    
-    epoch_loss = running_loss / len(train_loader)
-    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')
+    def _initialize_model(self):
+        """Helper method to initialize model architecture"""
+        raise NotImplementedError("This method should be implemented in child classes")
    
-    # Validation Step
-    model.eval()
-    val_loss = 0.0
-    
-    with torch.no_grad():
-        for batch in val_loader:
-            low_res = batch['low_res'].to(device)
-            high_res = batch['high_res'].to(device)
+    def train(self, num_epochs: int = 10):
+        """Train the model for specified number of epochs"""
+        self.model.to(self.device)
+        
+        for epoch in range(num_epochs):
+            print(f"Epoch {epoch+1}/{num_epochs}")
            
-            features = model.cnn(low_res)
-            outputs = model.upsample(features)
+            # Train phase
+            self._train_epoch()
            
-            loss = criterion(outputs, high_res)
-            val_loss += loss.item()
-
-    print(f"Validation Loss: {val_loss:.4f}")
+            # Validation phase
+            self._validate_epoch()
+            
+            # Save best model based on validation loss
+            if self.current_val_loss < self.best_val_loss:
+                self.save_model()
    
-    if val_loss < best_val_loss:
-        best_val_loss = val_loss
-        model.save("AIIA-base-512-upscaler")
-        print("Best model saved!")
+    def _train_epoch(self):
+        """Train model for one epoch"""
+        raise NotImplementedError("This method should be implemented in child classes")
+    
+    def _validate_epoch(self):
+        """Validate model performance"""
+        raise NotImplementedError("This method should be implemented in child classes")
+    
+    def save_model(self):
+        """Save current best model"""
+        torch.save({
+            'model_state_dict': self.model.state_dict(),
+            'optimizer_state_dict': self.optimizer.state_dict(),
+            'best_val_loss': self.best_val_loss
+        }, f"{self.model_name}_best.pth")
+        
+class Finetuner(TrainingBase):
+    def __init__(self,
+                 model_name: str = "AIIA-Base-512",
+                 dataset_paths: Union[List[str], Dict[str, str]] = None,
+                 batch_size: int = 32,
+                 learning_rate: float = 0.001,
+                 num_workers: int = 4,
+                 train_ratio: float = 0.8):
+        """
+        Specialized trainer for image super resolution tasks
+        
+        Args:
+            Same as TrainingBase
+        """
+        super().__init__(model_name, dataset_paths, batch_size, learning_rate, num_workers, train_ratio)
+        
+    def _initialize_datasets(self):
+        """Initialize image datasets"""
+        # Load dataframes from parquet files
+        if isinstance(self.dataset_paths, dict):
+            df_train = pd.read_parquet(self.dataset_paths['train'])
+            df_val = pd.read_parquet(self.dataset_paths['val']) if 'val' in self.dataset_paths else None
+        elif isinstance(self.dataset_paths, list):
+            df_train = pd.concat([pd.read_parquet(path) for path in self.dataset_paths], ignore_index=True)
+            df_val = None
+        else:
+            raise ValueError("Invalid dataset_paths format")
+        
+        # Split into train and validation sets if needed
+        if df_val is None:
+            df_train, df_val = train_test_split(df_train, test_size=1 - self.train_ratio, random_state=42)
+        
+        # Define preprocessing transforms
+        self.transform = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
+        ])
+        
+        # Create datasets and dataloaders
+        self.train_dataset = ImageDataset(df_train, transform=self.transform)
+        self.val_dataset = ImageDataset(df_val, transform=self.transform)
+        
+        self.train_loader = DataLoader(
+            self.train_dataset,
+            batch_size=self.batch_size,
+            shuffle=True,
+            num_workers=self.num_workers
+        )
+        
+        self.val_loader = DataLoader(
+            self.val_dataset,
+            batch_size=self.batch_size,
+            shuffle=False,
+            num_workers=self.num_workers
+        )
+    
+    def _initialize_model(self):
+        """Initialize and modify the super resolution model"""
+        # Load base model
+        self.model = AIIABase.load(self.model_name)
+        
+        # Freeze CNN layers
+        for param in self.model.cnn.parameters():
+            param.requires_grad = False
+            
+        # Add upscaling layer
+        hidden_size = self.model.config.hidden_size
+        self.model.upsample = nn.Sequential(
+            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
+            nn.Conv2d(hidden_size, 3, kernel_size=3, padding=1)
+        )
+        
+        # Initialize optimizer and loss function
+        self.criterion = nn.MSELoss()
+        self.optimizer = torch.optim.Adam(
+            [param for param in self.model.parameters() if 'upsample' in str(param)],
+            lr=self.learning_rate
+        )
+        
+        self.best_val_loss = float('inf')
+    
+    def _train_epoch(self):
+        """Train model for one epoch"""
+        self.model.train()
+        running_loss = 0.0
+        
+        for batch in self.train_loader:
+            low_res = batch['low_res'].to(self.device)
+            high_res = batch['high_res'].to(self.device)
+
+            # Forward pass
+            features = self.model.cnn(low_res)
+            outputs = self.model.upsample(features)
+
+            loss = self.criterion(outputs, high_res)
+
+            # Backward pass and optimize
+            self.optimizer.zero_grad()
+            loss.backward()
+            self.optimizer.step()
+
+            running_loss += loss.item()
+        
+        epoch_loss = running_loss / len(self.train_loader)
+        print(f"Train Loss: {epoch_loss:.4f}")
+    
+    def _validate_epoch(self):
+        """Validate model performance"""
+        self.model.eval()
+        val_loss = 0.0
+
+        with torch.no_grad():
+            for batch in self.val_loader:
+                low_res = batch['low_res'].to(self.device)
+                high_res = batch['high_res'].to(self.device)
+
+                features = self.model.cnn(low_res)
+                outputs = self.model.upsample(features)
+
+                loss = self.criterion(outputs, high_res)
+                val_loss += loss.item()
+
+        avg_val_loss = val_loss / len(self.val_loader)
+        print(f"Validation Loss: {avg_val_loss:.4f}")
+        
+        # Update best model
+        if avg_val_loss < self.best_val_loss:
+            self.best_val_loss = avg_val_loss
+            
+    def __repr__(self):
+        return f"Model ({self.model_name}, batch_size={self.batch_size})"
+
+
+# Example usage:
+if __name__ == "__main__":
+    finetuner = Finetuner(
+        train_parquet_path="/root/training_data/vision-dataset/image_upscaler.parquet",
+        val_parquet_path="/root/training_data/vision-dataset/image_vec_upscaler.parquet",
+        batch_size=2,
+        learning_rate=0.001
+    )
+    
+    finetuner.train_model(num_epochs=10)
--- a/src/aiunn/inference.py
+++ b/src/aiunn/inference.py
@ -0,0 +1,73 @@
+import torch
+from PIL import Image
+import torchvision.transforms as T
+from torch.nn import functional as F
+from aiia.model import AIIABase
+
+class UpScaler:
+    def __init__(self, model_path="AIIA-base-512-upscaler", device="cuda"):
+        self.device = torch.device(device)
+        self.model = AIIABase.load(model_path).to(self.device)
+        self.model.eval()
+        
+        # Preprocessing transforms
+        self.preprocess = T.Compose([
+            T.Lambda(lambda img: self._pad_to_square(img)),
+            T.Resize(512),
+            T.ToTensor(),
+            T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
+        ])
+        
+    def _pad_to_square(self, pil_img):
+        """Pad image to square while maintaining aspect ratio"""
+        w, h = pil_img.size
+        max_side = max(w, h)
+        hp = (max_side - w) // 2
+        vp = (max_side - h) // 2
+        padding = (hp, vp, max_side - w - hp, max_side - h - vp)
+        return T.functional.pad(pil_img, padding, 0, 'constant')
+    
+    def _remove_padding(self, tensor, original_size):
+        """Remove padding added during preprocessing"""
+        _, _, h, w = tensor.shape
+        orig_w, orig_h = original_size
+        
+        # Calculate scale factor
+        scale = 512 / max(orig_w, orig_h)
+        new_w = int(orig_w * scale)
+        new_h = int(orig_h * scale)
+        
+        # Calculate padding offsets
+        pad_w = (512 - new_w) // 2
+        pad_h = (512 - new_h) // 2
+        
+        # Remove padding
+        unpad = tensor[:, :, pad_h:pad_h+new_h, pad_w:pad_w+new_w]
+        
+        # Resize to target 2x resolution
+        return F.interpolate(unpad, size=(orig_h*2, orig_w*2), mode='bilinear', align_corners=False)
+
+    def upscale(self, input_image):
+        # Preprocess
+        original_size = input_image.size
+        input_tensor = self.preprocess(input_image).unsqueeze(0).to(self.device)
+        
+        # Inference
+        with torch.no_grad():
+            features = self.model.cnn(input_tensor)
+            output = self.model.upsample(features)
+        
+        # Postprocess
+        output = self._remove_padding(output, original_size)
+        
+        # Convert to PIL Image
+        output = output.squeeze(0).cpu().detach()
+        output = (output * 0.5 + 0.5).clamp(0, 1)
+        return T.functional.to_pil_image(output)
+
+# Usage example
+if __name__ == "__main__":
+    upscaler = UpScaler()
+    input_image = Image.open("input.jpg")
+    output_image = upscaler.upscale(input_image)
+    output_image.save("output_2x.jpg")