Merge pull request 'develop' (#4) from develop into main

Reviewed-on: #4
2025-03-01 21:47:16 +00:00 · 2025-03-01 21:47:16 +00:00 · cdf1e19280
parent feea191388 81eaebac5b
commit cdf1e19280
13 changed files with 807 additions and 0 deletions
--- a/example.py
+++ b/example.py
@ -0,0 +1,106 @@
+from aiia import AIIABase
+from aiunn import aiuNN
+from aiunn import aiuNNTrainer
+import pandas as pd
+import io
+import base64
+from PIL import Image, ImageFile
+from torch.utils.data import Dataset
+from torchvision import transforms
+
+
+
+class UpscaleDataset(Dataset):
+    def __init__(self, parquet_files: list, transform=None, samples_per_file=10_000):
+        combined_df = pd.DataFrame()
+        for parquet_file in parquet_files:
+            # Load a subset from each parquet file
+            df = pd.read_parquet(parquet_file, columns=['image_512', 'image_1024']).head(samples_per_file)
+            combined_df = pd.concat([combined_df, df], ignore_index=True)
+
+        # Validate rows (ensuring each value is bytes or str)
+        self.df = combined_df.apply(self._validate_row, axis=1)
+        self.transform = transform
+        self.failed_indices = set()
+
+    def _validate_row(self, row):
+        for col in ['image_512', 'image_1024']:
+            if not isinstance(row[col], (bytes, str)):
+                raise ValueError(f"Invalid data type in column {col}: {type(row[col])}")
+        return row
+
+    def _decode_image(self, data):
+        try:
+            if isinstance(data, str):
+                return base64.b64decode(data)
+            elif isinstance(data, bytes):
+                return data
+            raise ValueError(f"Unsupported data type: {type(data)}")
+        except Exception as e:
+            raise RuntimeError(f"Decoding failed: {str(e)}")
+
+    def __len__(self):
+        return len(self.df)
+
+    def __getitem__(self, idx):
+        # If previous call failed for this index, use a different index
+        if idx in self.failed_indices:
+            return self[(idx + 1) % len(self)]
+        try:
+            row = self.df.iloc[idx]
+            low_res_bytes = self._decode_image(row['image_512'])
+            high_res_bytes = self._decode_image(row['image_1024'])
+            ImageFile.LOAD_TRUNCATED_IMAGES = True
+            # Open image bytes with Pillow and convert to RGBA first
+            low_res_rgba = Image.open(io.BytesIO(low_res_bytes)).convert('RGBA')
+            high_res_rgba = Image.open(io.BytesIO(high_res_bytes)).convert('RGBA')
+            
+            # Create a new RGB image with black background
+            low_res_rgb = Image.new("RGB", low_res_rgba.size, (0, 0, 0))
+            high_res_rgb = Image.new("RGB", high_res_rgba.size, (0, 0, 0))
+            
+            # Composite the original image over the black background
+            low_res_rgb.paste(low_res_rgba, mask=low_res_rgba.split()[3])
+            high_res_rgb.paste(high_res_rgba, mask=high_res_rgba.split()[3])
+            
+            # Now we have true 3-channel RGB images with transparent areas converted to black
+            low_res = low_res_rgb
+            high_res = high_res_rgb
+            
+            # Resize the images to reduce VRAM usage
+            low_res = low_res.resize((410, 410), Image.LANCZOS)
+            high_res = high_res.resize((820, 820), Image.LANCZOS)
+            
+            # If a transform is provided (e.g. conversion to Tensor), apply it
+            if self.transform:
+                low_res = self.transform(low_res)
+                high_res = self.transform(high_res)
+            return low_res, high_res
+        except Exception as e:
+            print(f"\nError at index {idx}: {str(e)}")
+            self.failed_indices.add(idx)
+            return self[(idx + 1) % len(self)]
+
+
+if __name__ =="__main__":
+    # Load your base model and upscaler
+    pretrained_model_path = "/root/vision/AIIA/AIIA-base-512"
+    base_model = AIIABase.load(pretrained_model_path, precision="bf16")
+    upscaler = aiuNN(base_model)
+
+    # Create trainer with your dataset class
+    trainer = aiuNNTrainer(upscaler, dataset_class=UpscaleDataset)
+
+    # Load data using parameters for your dataset
+    dataset_params = {
+        'parquet_files': [
+            "/root/training_data/vision-dataset/image_upscaler.parquet",
+            "/root/training_data/vision-dataset/image_vec_upscaler.parquet"
+        ],
+        'transform': transforms.Compose([transforms.ToTensor()]),
+        'samples_per_file': 5000
+    }
+    trainer.load_data(dataset_params=dataset_params, batch_size=1)
+
+    # Fine-tune the model
+    trainer.finetune(output_path="trained_models")
--- a/input.jpg
+++ b/input.jpg
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,17 @@
+[build-system]
+requires = ["setuptools>=45", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "aiunn"
+version = "0.1.1"
+description = "Finetuner for image upscaling using AIIA"
+readme = "README.md"
+requires-python = ">=3.10"
+license = {file = "LICENSE"}
+authors = [
+    {name = "Falko Habel", email = "falko.habel@gmx.de"},
+]
+
+[project.urls]
+"Homepage" = "https://gitea.fabelous.app/Machine-Learning/aiuNN"
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,5 @@
+torch
+aiia
+pillow
+torchvision
+sklearn
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,14 @@
+from setuptools import setup, find_packages
+
+setup(
+    name="aiunn",
+    version="0.1.1",
+    packages=find_packages(where="src"),
+    package_dir={"": "src"},
+    install_requires=[
+        line.strip()
+        for line in open("requirements.txt")
+        if line.strip() and not line.startswith("#")
+    ],
+    python_requires=">=3.10",
+)
--- a/src/aiunn/init.py
+++ b/src/aiunn/init.py
@ -0,0 +1,6 @@
+from .finetune.trainer import aiuNNTrainer
+from .upsampler.aiunn import aiuNN
+from .upsampler.config import aiuNNConfig
+from .inference.inference import aiuNNInference
+
+__version__ = "0.1.1"
--- a/src/aiunn/finetune/init.py
+++ b/src/aiunn/finetune/init.py
@ -0,0 +1,3 @@
+from .trainer import aiuNNTrainer 
+
+__all__ = ["aiuNNTrainer" ]
--- a/src/aiunn/finetune/trainer.py
+++ b/src/aiunn/finetune/trainer.py
@ -0,0 +1,290 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import os
+import csv
+from torch.amp import autocast, GradScaler
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+from torch.utils.checkpoint import checkpoint
+import gc
+import time
+import shutil
+
+
+class EarlyStopping:
+    def __init__(self, patience=3, min_delta=0.001):
+        # Number of epochs with no significant improvement before stopping
+         # Minimum change in loss required to count as an improvement
+        self.patience = patience          
+        self.min_delta = min_delta       
+        self.best_loss = float('inf')
+        self.counter = 0
+        self.early_stop = False
+
+    def __call__(self, epoch_loss):
+        if epoch_loss < self.best_loss - self.min_delta:
+            self.best_loss = epoch_loss
+            self.counter = 0
+            return True  # Improved
+        else:
+            self.counter += 1
+            if self.counter >= self.patience:
+                self.early_stop = True
+            return False  # Not improved
+
+class aiuNNTrainer:
+    def __init__(self, upscaler_model, dataset_class=None):
+        """
+        Initialize the upscaler trainer
+        
+        Args:
+            upscaler_model: The model to fine-tune
+            dataset_class: The dataset class to use for loading data (optional)
+        """
+        self.model = upscaler_model
+        self.dataset_class = dataset_class
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model = self.model.to(self.device, memory_format=torch.channels_last)
+        self.criterion = nn.MSELoss()
+        self.optimizer = None
+        self.scaler = GradScaler()
+        self.best_loss = float('inf')
+        self.use_checkpointing = True
+        self.data_loader = None
+        self.validation_loader = None
+        self.log_dir = None
+        
+    def load_data(self, dataset_params=None, batch_size=1, validation_split=0.2, custom_train_dataset=None, custom_val_dataset=None):
+        """
+        Load data using either a custom dataset instance or the dataset class provided at initialization
+        
+        Args:
+            dataset_params (dict/list): Parameters to pass to the dataset class constructor
+            batch_size (int): Batch size for training
+            validation_split (float): Proportion of data to use for validation
+            custom_train_dataset: A pre-instantiated dataset to use for training (optional)
+            custom_val_dataset: A pre-instantiated dataset to use for validation (optional)
+        """
+        # If custom datasets are provided directly, use them
+        if custom_train_dataset is not None:
+            train_dataset = custom_train_dataset
+            val_dataset = custom_val_dataset if custom_val_dataset is not None else None
+        else:
+            # Otherwise instantiate dataset using the class and parameters
+            if self.dataset_class is None:
+                raise ValueError("No dataset class provided. Either provide a dataset class at initialization or custom datasets.")
+            
+            # Create dataset instance
+            dataset = self.dataset_class(**dataset_params if isinstance(dataset_params, dict) else {'parquet_files': dataset_params})
+            
+            # Split into train and validation sets
+            dataset_size = len(dataset)
+            val_size = int(validation_split * dataset_size)
+            train_size = dataset_size - val_size
+            
+            train_dataset, val_dataset = torch.utils.data.random_split(
+                dataset, [train_size, val_size]
+            )
+        
+        # Create data loaders
+        self.data_loader = DataLoader(
+            train_dataset, 
+            batch_size=batch_size, 
+            shuffle=True,
+            pin_memory=True
+        )
+        
+        if val_dataset is not None:
+            self.validation_loader = DataLoader(
+                val_dataset, 
+                batch_size=batch_size, 
+                shuffle=False,
+                pin_memory=True
+            )
+            print(f"Loaded {len(train_dataset)} training samples and {len(val_dataset)} validation samples")
+        else:
+            self.validation_loader = None
+            print(f"Loaded {len(train_dataset)} training samples (no validation set)")
+        
+        return self.data_loader, self.validation_loader
+    
+    def _setup_logging(self, output_path):
+        """Set up directory structure for logging and model checkpoints"""
+        timestamp = time.strftime("%Y%m%d-%H%M%S")
+        self.log_dir = os.path.join(output_path, f"training_run_{timestamp}")
+        os.makedirs(self.log_dir, exist_ok=True)
+        
+        # Create checkpoint directory
+        self.checkpoint_dir = os.path.join(self.log_dir, "checkpoints")
+        os.makedirs(self.checkpoint_dir, exist_ok=True)
+        
+        # Set up CSV logging
+        self.csv_path = os.path.join(self.log_dir, 'training_log.csv')
+        with open(self.csv_path, mode='w', newline='') as file:
+            writer = csv.writer(file)
+            if self.validation_loader:
+                writer.writerow(['Epoch', 'Train Loss', 'Validation Loss', 'Improved'])
+            else:
+                writer.writerow(['Epoch', 'Train Loss', 'Improved'])
+    
+    def _evaluate(self):
+        """Evaluate the model on validation data"""
+        if self.validation_loader is None:
+            return 0.0
+            
+        self.model.eval()
+        val_loss = 0.0
+        
+        with torch.no_grad():
+            for low_res, high_res in tqdm(self.validation_loader, desc="Validating"):
+                low_res = low_res.to(self.device, non_blocking=True).to(memory_format=torch.channels_last)
+                high_res = high_res.to(self.device, non_blocking=True)
+                
+                with autocast(device_type=self.device.type):
+                    outputs = self.model(low_res)
+                    loss = self.criterion(outputs, high_res)
+                
+                val_loss += loss.item()
+                
+                del low_res, high_res, outputs, loss
+        
+        self.model.train()
+        return val_loss
+    
+    def _save_checkpoint(self, epoch, is_best=False):
+        """Save model checkpoint"""
+        checkpoint_path = os.path.join(self.checkpoint_dir, f"epoch_{epoch}.pt")
+        best_model_path = os.path.join(self.log_dir, "best_model")
+        
+        # Save the model checkpoint
+        self.model.save(checkpoint_path)
+        
+        # If this is the best model so far, copy it to best_model
+        if is_best:
+            if os.path.exists(best_model_path):
+                shutil.rmtree(best_model_path)
+            self.model.save(best_model_path)
+            print(f"Saved new best model with loss: {self.best_loss:.6f}")
+    
+    def finetune(self, output_path, epochs=10, lr=1e-4, patience=3, min_delta=0.001):
+        """
+        Finetune the upscaler model
+        
+        Args:
+            output_path (str): Directory to save models and logs
+            epochs (int): Maximum number of training epochs
+            lr (float): Learning rate
+            patience (int): Early stopping patience
+            min_delta (float): Minimum improvement for early stopping
+        """
+        # Check if data is loaded
+        if self.data_loader is None:
+            raise ValueError("Data not loaded. Call load_data first.")
+        
+        # Setup optimizer
+        self.optimizer = optim.Adam(self.model.parameters(), lr=lr)
+        
+        # Set up logging
+        self._setup_logging(output_path)
+        
+        # Setup early stopping
+        early_stopping = EarlyStopping(patience=patience, min_delta=min_delta)
+        
+        # Training loop
+        self.model.train()
+        
+        for epoch in range(epochs):
+            # Training phase
+            epoch_loss = 0.0
+            progress_bar = tqdm(self.data_loader, desc=f"Epoch {epoch + 1}/{epochs}")
+            
+            for low_res, high_res in progress_bar:
+                # Move data to GPU with channels_last format where possible
+                low_res = low_res.to(self.device, non_blocking=True).to(memory_format=torch.channels_last)
+                high_res = high_res.to(self.device, non_blocking=True)
+                
+                self.optimizer.zero_grad()
+                
+                with autocast(device_type=self.device.type):
+                    if self.use_checkpointing:
+                        # Ensure the input tensor requires gradient so that checkpointing records the computation graph
+                        low_res.requires_grad_()  
+                        outputs = checkpoint(self.model, low_res)
+                    else:
+                        outputs = self.model(low_res)
+                    loss = self.criterion(outputs, high_res)
+                
+                self.scaler.scale(loss).backward()
+                self.scaler.step(self.optimizer)
+                self.scaler.update()
+                
+                epoch_loss += loss.item()
+                progress_bar.set_postfix({'loss': loss.item()})
+                
+                # Optionally delete variables to free memory
+                del low_res, high_res, outputs, loss
+            
+            # Calculate average epoch loss
+            avg_train_loss = epoch_loss / len(self.data_loader)
+            
+            # Validation phase (if validation loader exists)
+            if self.validation_loader:
+                val_loss = self._evaluate() / len(self.validation_loader)
+                is_improved = val_loss < self.best_loss
+                if is_improved:
+                    self.best_loss = val_loss
+                
+                # Log results
+                print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {avg_train_loss:.6f}, Val Loss: {val_loss:.6f}")
+                with open(self.csv_path, mode='a', newline='') as file:
+                    writer = csv.writer(file)
+                    writer.writerow([epoch + 1, avg_train_loss, val_loss, "Yes" if is_improved else "No"])
+            else:
+                # If no validation, use training loss for improvement tracking
+                is_improved = avg_train_loss < self.best_loss
+                if is_improved:
+                    self.best_loss = avg_train_loss
+                
+                # Log results
+                print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {avg_train_loss:.6f}")
+                with open(self.csv_path, mode='a', newline='') as file:
+                    writer = csv.writer(file)
+                    writer.writerow([epoch + 1, avg_train_loss, "Yes" if is_improved else "No"])
+            
+            # Save checkpoint
+            self._save_checkpoint(epoch + 1, is_best=is_improved)
+            
+            # Perform garbage collection and clear GPU cache after each epoch
+            gc.collect()
+            torch.cuda.empty_cache()
+            
+            # Check early stopping
+            early_stopping(val_loss if self.validation_loader else avg_train_loss)
+            if early_stopping.early_stop:
+                print(f"Early stopping triggered at epoch {epoch + 1}")
+                break
+        
+        return self.best_loss
+    
+    def save(self, output_path=None):
+        """
+        Save the best model to the specified path
+        
+        Args:
+            output_path (str, optional): Path to save the model. If None, uses the best model from training.
+        """
+        if output_path is None and self.log_dir is not None:
+            best_model_path = os.path.join(self.log_dir, "best_model")
+            if os.path.exists(best_model_path):
+                print(f"Best model already saved at {best_model_path}")
+                return best_model_path
+            else:
+                output_path = os.path.join(self.log_dir, "final_model")
+        
+        if output_path is None:
+            raise ValueError("No output path specified and no training has been done yet.")
+        
+        self.model.save(output_path)
+        print(f"Model saved to {output_path}")
+        return output_path
--- a/src/aiunn/inference/init.py
+++ b/src/aiunn/inference/init.py
@ -0,0 +1,3 @@
+from .inference import aiuNNInference
+
+__all__ = ["aiuNNInference"]
--- a/src/aiunn/inference/inference.py
+++ b/src/aiunn/inference/inference.py
@ -0,0 +1,226 @@
+import os
+import torch
+import numpy as np
+from PIL import Image
+import io
+from typing import Union, Optional, Tuple, List
+from ..upsampler.aiunn import aiuNN
+
+
+class aiuNNInference:
+    """
+    Inference class for aiuNN upsampling model.
+    Handles model loading, image upscaling, and output processing.
+    """
+    def __init__(self, model_path: str, precision: Optional[str] = None, device: Optional[str] = None):
+        """
+        Initialize the inference class by loading the aiuNN model.
+        
+        Args:
+            model_path: Path to the saved model directory
+            precision: Optional precision setting ('fp16', 'bf16', or None for default)
+            device: Optional device specification ('cuda', 'cpu', or None for auto-detection)
+        """
+        
+        
+        # Set device
+        if device is None:
+            self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        else:
+            self.device = device
+            
+        # Load the model with specified precision
+        self.model = aiuNN.load(model_path, precision=precision)
+        self.model.to(self.device)
+        self.model.eval()
+        
+        # Store configuration for reference
+        self.config = self.model.config
+        
+    def preprocess_image(self, image: Union[str, Image.Image, np.ndarray, torch.Tensor]) -> torch.Tensor:
+        """
+        Preprocess the input image to match model requirements.
+        
+        Args:
+            image: Input image as file path, PIL Image, numpy array, or torch tensor
+            
+        Returns:
+            Preprocessed tensor ready for model input
+        """
+        # Handle different input types
+        if isinstance(image, str):
+            # Load from file path
+            image = Image.open(image).convert('RGB')
+        
+        if isinstance(image, Image.Image):
+            # Convert PIL Image to tensor
+            image = np.array(image)
+            image = image.transpose(2, 0, 1)  # HWC to CHW
+            image = torch.from_numpy(image).float()
+        
+        if isinstance(image, np.ndarray):
+            # Convert numpy array to tensor
+            if image.shape[0] == 3:
+                # Already in CHW format
+                pass
+            elif image.shape[-1] == 3:
+                # HWC to CHW format
+                image = image.transpose(2, 0, 1)
+            image = torch.from_numpy(image).float()
+        
+        # Normalize to [0, 1] range if needed
+        if image.max() > 1.0:
+            image = image / 255.0
+            
+        # Add batch dimension if not present
+        if len(image.shape) == 3:
+            image = image.unsqueeze(0)
+            
+        # Move to device
+        image = image.to(self.device)
+        
+        return image
+    
+    def postprocess_tensor(self, tensor: torch.Tensor) -> Image.Image:
+        """
+        Convert output tensor to PIL Image.
+        
+        Args:
+            tensor: Output tensor from model
+            
+        Returns:
+            Processed PIL Image
+        """
+        # Move to CPU and convert to numpy
+        output = tensor.detach().cpu().squeeze(0).numpy()
+        
+        # Ensure proper range [0, 255]
+        output = np.clip(output * 255, 0, 255).astype(np.uint8)
+        
+        # Convert from CHW to HWC for PIL
+        output = output.transpose(1, 2, 0)
+        
+        # Create PIL Image
+        return Image.fromarray(output)
+    
+    @torch.no_grad()
+    def upscale(self, image: Union[str, Image.Image, np.ndarray, torch.Tensor]) -> Image.Image:
+        """
+        Upscale an image using the aiuNN model.
+        
+        Args:
+            image: Input image to upscale
+            
+        Returns:
+            Upscaled image as PIL Image
+        """
+        # Preprocess input
+        input_tensor = self.preprocess_image(image)
+        
+        # Run inference
+        output_tensor = self.model(input_tensor)
+        
+        # Postprocess output
+        upscaled_image = self.postprocess_tensor(output_tensor)
+        
+        return upscaled_image
+    
+    def save(self, image: Image.Image, output_path: str, format: Optional[str] = None) -> None:
+        """
+        Save the upscaled image to a file.
+        
+        Args:
+            image: PIL Image to save
+            output_path: Path where the image should be saved
+            format: Optional format override (e.g., 'PNG', 'JPEG')
+        """
+        # Create directory if it doesn't exist
+        os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
+        
+        # Save the image
+        image.save(output_path, format=format)
+        
+    def convert_to_binary(self, image: Image.Image, format: str = 'PNG') -> bytes:
+        """
+        Convert the image to binary data.
+        
+        Args:
+            image: PIL Image to convert
+            format: Image format to use for binary conversion
+            
+        Returns:
+            Binary representation of the image
+        """
+        # Use BytesIO to convert to binary
+        binary_output = io.BytesIO()
+        image.save(binary_output, format=format)
+        
+        # Get the binary data
+        binary_data = binary_output.getvalue()
+        
+        return binary_data
+    
+    def process_batch(self, 
+                      images: List[Union[str, Image.Image]], 
+                      output_dir: Optional[str] = None,
+                      save_format: str = 'PNG',
+                      return_binary: bool = False) -> Union[List[Image.Image], List[bytes], None]:
+        """
+        Process multiple images in batch.
+        
+        Args:
+            images: List of input images (paths or PIL Images)
+            output_dir: Optional directory to save results
+            save_format: Format to use when saving images
+            return_binary: Whether to return binary data instead of PIL Images
+            
+        Returns:
+            List of processed images or binary data, or None if only saving
+        """
+        results = []
+        
+        for i, img in enumerate(images):
+            # Upscale the image
+            upscaled = self.upscale(img)
+            
+            # Save if output directory is provided
+            if output_dir:
+                # Extract filename if input is a path
+                if isinstance(img, str):
+                    filename = os.path.basename(img)
+                    base, _ = os.path.splitext(filename)
+                else:
+                    base = f"upscaled_{i}"
+                
+                output_path = os.path.join(output_dir, f"{base}.{save_format.lower()}")
+                self.save(upscaled, output_path, format=save_format)
+            
+            # Add to results based on return type
+            if return_binary:
+                results.append(self.convert_to_binary(upscaled, format=save_format))
+            else:
+                results.append(upscaled)
+        
+        return results if (not output_dir or return_binary or not save_format) else None
+
+
+# Example usage (can be removed)
+if __name__ == "__main__":
+    # Initialize inference with a model path
+    inferencer = aiuNNInference("path/to/model", precision="bf16")
+    
+    # Upscale a single image
+    upscaled_image = inferencer.upscale("input_image.jpg")
+    
+    # Save the result
+    inferencer.save(upscaled_image, "output_image.png")
+    
+    # Convert to binary
+    binary_data = inferencer.convert_to_binary(upscaled_image)
+    
+    # Process a batch of images
+    inferencer.process_batch(
+        ["image1.jpg", "image2.jpg"], 
+        output_dir="output_folder",
+        save_format="PNG"
+    )
--- a/src/aiunn/upsampler/init.py
+++ b/src/aiunn/upsampler/init.py
@ -0,0 +1,5 @@
+from .aiunn import aiuNN
+from .config import aiuNNConfig
+
+
+__all__ = ["aiuNN", "aiuNNConfig"]
--- a/src/aiunn/upsampler/aiunn.py
+++ b/src/aiunn/upsampler/aiunn.py
@ -0,0 +1,82 @@
+import os
+import torch
+import torch.nn as nn
+import warnings
+from aiia import AIIA, AIIAConfig, AIIABase
+from .config import aiuNNConfig
+import warnings
+
+
+class aiuNN(AIIA):
+    def __init__(self, base_model: AIIABase):
+        super().__init__(base_model.config)
+        self.base_model = base_model
+        
+        # Pass the unified base configuration using the new parameter.
+        self.config = aiuNNConfig(base_config=base_model.config)
+        
+        self.upsample = nn.Upsample(
+            scale_factor=self.config.upsample_scale,
+            mode=self.config.upsample_mode,
+            align_corners=self.config.upsample_align_corners
+        )
+        # Conversion layer: change from hidden size channels to 3 channels.
+        self.to_rgb = nn.Conv2d(
+            in_channels=self.base_model.config.hidden_size,
+            out_channels=3,
+            kernel_size=1
+        )
+
+
+    def forward(self, x):
+        x = self.base_model(x)
+        x = self.upsample(x)
+        x = self.to_rgb(x)  # Ensures output has 3 channels.
+        return x
+
+    @classmethod
+    def load(cls, path, precision: str = None):
+        # Load the configuration from disk.
+        config = AIIAConfig.load(path)
+        # Reconstruct the base model from the loaded configuration.
+        base_model = AIIABase(config)
+        # Instantiate the Upsampler using the proper base model.
+        upsampler = cls(base_model)
+        
+        # Load state dict and handle precision conversion if needed.
+        device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        state_dict = torch.load(f"{path}/model.pth", map_location=device)
+        if precision is not None:
+            if precision.lower() == 'fp16':
+                dtype = torch.float16
+            elif precision.lower() == 'bf16':
+                if device == 'cuda' and not torch.cuda.is_bf16_supported():
+                    warnings.warn("BF16 is not supported on this GPU. Falling back to FP16.")
+                    dtype = torch.float16
+                else:
+                    dtype = torch.bfloat16
+            else:
+                raise ValueError("Unsupported precision. Use 'fp16', 'bf16', or leave as None.")
+            
+            for key, param in state_dict.items():
+                if torch.is_tensor(param):
+                    state_dict[key] = param.to(dtype)
+        upsampler.load_state_dict(state_dict)
+        return upsampler
+
+
+
+if __name__ == "__main__":
+    from aiia import AIIABase, AIIAConfig
+    # Create a configuration and build a base model.
+    config = AIIAConfig()
+    base_model = AIIABase(config)
+    # Instantiate Upsampler from the base model (works correctly).
+    upsampler = aiuNN(base_model)
+
+    # Save the model (both configuration and weights).
+    upsampler.save("hehe")
+
+    # Now load using the overridden load method; this will load the complete model.
+    upsampler_loaded = aiuNN.load("hehe", precision="bf16")
+    print("Updated configuration:", upsampler_loaded.config.__dict__)
--- a/src/aiunn/upsampler/config.py
+++ b/src/aiunn/upsampler/config.py
@ -0,0 +1,50 @@
+from aiia import AIIAConfig
+
+
+class aiuNNConfig(AIIAConfig):
+    def __init__(
+        self,
+        base_config=None,
+        upsample_scale: int = 2,
+        upsample_mode: str = 'bilinear',
+        upsample_align_corners: bool = False,
+        layers=None,
+        **kwargs
+    ):
+        # Start with a single configuration dictionary.
+        config_data = {}
+        if base_config is not None:
+            # If base_config is an object with a to_dict method, use it.
+            if hasattr(base_config, "to_dict"):
+                config_data.update(base_config.to_dict())
+            elif isinstance(base_config, dict):
+                config_data.update(base_config)
+        
+        # Update with any additional keyword arguments (if needed).
+        config_data.update(kwargs)
+        
+        # Initialize base AIIAConfig with a single merged configuration.
+        super().__init__(**config_data)
+        
+        # Upsampler-specific parameters.
+        self.upsample_scale = upsample_scale
+        self.upsample_mode = upsample_mode
+        self.upsample_align_corners = upsample_align_corners
+        
+        # Use layers from the argument or initialize an empty list.
+        self.layers = layers if layers is not None else []
+        
+        # Add the upsample layer details only once.
+        self.add_upsample_layer()
+
+    def add_upsample_layer(self):
+        upsample_layer = {
+            'name': 'Upsample',
+            'type': 'nn.Upsample',
+            'scale_factor': self.upsample_scale,
+            'mode': self.upsample_mode,
+            'align_corners': self.upsample_align_corners
+        }
+        # Append the layer only if it isn’t already present.
+        if not any(layer.get('name') == 'Upsample' for layer in self.layers):
+            self.layers.append(upsample_layer)