diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4c8acdb --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,14 @@ +[build-system] +requires = ["setuptools>=45", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "aiunn" +version = "0.1.0" +description = "A brief description of your package" +readme = "README.md" +requires-python = ">=3.7" +license = {file = "LICENSE"} +authors = [ + {name = "Your Name", email = "your.email@example.com"}, +] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8e47744 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +torch +aiia +pillow +torchvision +sklearn \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..aa53ea8 --- /dev/null +++ b/setup.py @@ -0,0 +1,25 @@ +from setuptools import setup, find_packages + +setup( + name="aiunn", + version="0.1.0", + packages=find_packages(where="src"), + package_dir={"": "src"}, + install_requires=[ + line.strip() + for line in open("requirements.txt") + if line.strip() and not line.startswith("#") + ], + author="Falko Habel", + author_email="falko.habel@gmx.de", + description="Finetuner for image upscaling using AIIA", + long_description=open("README.md").read(), + long_description_content_type="text/markdown", + url="https://github.com/yourusername/aiunn", + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], + python_requires=">=3.7", +) diff --git a/src/aiunn/__init__.py b/src/aiunn/__init__.py index e69de29..a8013f3 100644 --- a/src/aiunn/__init__.py +++ b/src/aiunn/__init__.py @@ -0,0 +1,6 @@ + +from .finetune import * +from .inference import UpScaler + +__version__ = "0.1.0" + diff --git a/src/aiunn/finetune.py b/src/aiunn/finetune.py index a911f7f..1644662 100644 --- a/src/aiunn/finetune.py +++ b/src/aiunn/finetune.py @@ -7,138 +7,253 @@ from torch.utils.data import Dataset, DataLoader import torchvision.transforms as transforms from aiia.model import AIIABase from sklearn.model_selection import train_test_split +from typing import Dict, List, Union -# Step 1: Define Custom Dataset Class class ImageDataset(Dataset): def __init__(self, dataframe, transform=None): self.dataframe = dataframe self.transform = transform - + def __len__(self): return len(self.dataframe) - + def __getitem__(self, idx): row = self.dataframe.iloc[idx] - + # Decode image_512 from bytes img_bytes = row['image_512'] img_stream = io.BytesIO(img_bytes) low_res_image = Image.open(img_stream).convert('RGB') - + # Decode image_1024 from bytes high_res_bytes = row['image_1024'] high_stream = io.BytesIO(high_res_bytes) high_res_image = Image.open(high_stream).convert('RGB') - + # Apply transformations if specified if self.transform: low_res_image = self.transform(low_res_image) high_res_image = self.transform(high_res_image) - + return {'low_res': low_res_image, 'high_res': high_res_image} -# Step 2: Load and Preprocess Data -# Read the dataset (assuming it's a DataFrame with columns 'image_512' and 'image_1024') -df1 = pd.read_parquet('/root/training_data/vision-dataset/image_upscaler.parquet') -df2 = pd.read_parquet('/root/training_data/vision-dataset/image_vec_upscaler.parquet') -# Combine the two datasets into one DataFrame -df = pd.concat([df1, df2], ignore_index=True) - -# Split into training and validation sets -train_df, val_df = train_test_split(df, test_size=0.2, random_state=42) - -# Define preprocessing transforms -transform = transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) -]) - -train_dataset = ImageDataset(train_df, transform=transform) -val_dataset = ImageDataset(val_df, transform=transform) - -# Create DataLoaders -batch_size = 2 -train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) -val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4) - -# Step 3: Load Pre-trained Model and Modify for Upscaling -model = AIIABase.load("AIIA-Base-512") - -# Freeze original CNN layers to prevent catastrophic forgetting -for param in model.cnn.parameters(): - param.requires_grad = False - -# Add upsample module -hidden_size = model.config.hidden_size # Assuming this is defined in your model's config -model.upsample = torch.nn.Sequential( - nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False), - nn.Conv2d(hidden_size, 3, kernel_size=3, padding=1) -) - -# Step 4: Define Loss Function and Optimizer -criterion = torch.nn.MSELoss() -optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) # Adjust learning rate as needed - -# Alternatively, if you want to train only the new layers: -params_to_update = [] -for name, param in model.named_parameters(): - if 'upsample' in name: - params_to_update.append(param) -optimizer = torch.optim.Adam(params_to_update, lr=0.001) - -# Step 5: Training Loop -device = torch.device("cuda" if torch.cuda.is_available() else "cpu") -model.to(device) - -best_val_loss = float('inf') -num_epochs = 10 # Adjust as needed - -for epoch in range(num_epochs): - model.train() - running_loss = 0.0 - - for batch in train_loader: - low_res = batch['low_res'].to(device) - high_res = batch['high_res'].to(device) +class TrainingBase: + def __init__(self, + model_name: str, + dataset_paths: Union[List[str], Dict[str, str]], + batch_size: int = 32, + learning_rate: float = 0.001, + num_workers: int = 4, + train_ratio: float = 0.8): + """ + Base class for training models with multiple dataset support - # Forward pass - features = model.cnn(low_res) - outputs = model.upsample(features) + Args: + model_name (str): Name of the model to initialize + dataset_paths (Union[List[str], Dict[str, str]]): Paths to datasets (train and optional validation) + batch_size (int): Batch size for training + learning_rate (float): Learning rate for optimizer + num_workers (int): Number of workers for data loading + train_ratio (float): Ratio of data to use for training (rest goes to validation) + """ + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.batch_size = batch_size + self.num_workers = num_workers - loss = criterion(outputs, high_res) + # Initialize datasets and loaders + self.dataset_paths = dataset_paths + self._initialize_datasets() - # Backward pass and optimize - optimizer.zero_grad() - loss.backward() - optimizer.step() + # Initialize model and training parameters + self.model_name = model_name + self.learning_rate = learning_rate + self._initialize_model() - running_loss += loss.item() + def _initialize_datasets(self): + """Helper method to initialize datasets""" + raise NotImplementedError("This method should be implemented in child classes") - epoch_loss = running_loss / len(train_loader) - print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}') + def _initialize_model(self): + """Helper method to initialize model architecture""" + raise NotImplementedError("This method should be implemented in child classes") - # Validation Step - model.eval() - val_loss = 0.0 - - with torch.no_grad(): - for batch in val_loader: - low_res = batch['low_res'].to(device) - high_res = batch['high_res'].to(device) + def train(self, num_epochs: int = 10): + """Train the model for specified number of epochs""" + self.model.to(self.device) + + for epoch in range(num_epochs): + print(f"Epoch {epoch+1}/{num_epochs}") - features = model.cnn(low_res) - outputs = model.upsample(features) + # Train phase + self._train_epoch() - loss = criterion(outputs, high_res) - val_loss += loss.item() - - print(f"Validation Loss: {val_loss:.4f}") + # Validation phase + self._validate_epoch() + + # Save best model based on validation loss + if self.current_val_loss < self.best_val_loss: + self.save_model() - if val_loss < best_val_loss: - best_val_loss = val_loss - model.save("AIIA-base-512-upscaler") - print("Best model saved!") \ No newline at end of file + def _train_epoch(self): + """Train model for one epoch""" + raise NotImplementedError("This method should be implemented in child classes") + + def _validate_epoch(self): + """Validate model performance""" + raise NotImplementedError("This method should be implemented in child classes") + + def save_model(self): + """Save current best model""" + torch.save({ + 'model_state_dict': self.model.state_dict(), + 'optimizer_state_dict': self.optimizer.state_dict(), + 'best_val_loss': self.best_val_loss + }, f"{self.model_name}_best.pth") + +class Finetuner(TrainingBase): + def __init__(self, + model_name: str = "AIIA-Base-512", + dataset_paths: Union[List[str], Dict[str, str]] = None, + batch_size: int = 32, + learning_rate: float = 0.001, + num_workers: int = 4, + train_ratio: float = 0.8): + """ + Specialized trainer for image super resolution tasks + + Args: + Same as TrainingBase + """ + super().__init__(model_name, dataset_paths, batch_size, learning_rate, num_workers, train_ratio) + + def _initialize_datasets(self): + """Initialize image datasets""" + # Load dataframes from parquet files + if isinstance(self.dataset_paths, dict): + df_train = pd.read_parquet(self.dataset_paths['train']) + df_val = pd.read_parquet(self.dataset_paths['val']) if 'val' in self.dataset_paths else None + elif isinstance(self.dataset_paths, list): + df_train = pd.concat([pd.read_parquet(path) for path in self.dataset_paths], ignore_index=True) + df_val = None + else: + raise ValueError("Invalid dataset_paths format") + + # Split into train and validation sets if needed + if df_val is None: + df_train, df_val = train_test_split(df_train, test_size=1 - self.train_ratio, random_state=42) + + # Define preprocessing transforms + self.transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + ]) + + # Create datasets and dataloaders + self.train_dataset = ImageDataset(df_train, transform=self.transform) + self.val_dataset = ImageDataset(df_val, transform=self.transform) + + self.train_loader = DataLoader( + self.train_dataset, + batch_size=self.batch_size, + shuffle=True, + num_workers=self.num_workers + ) + + self.val_loader = DataLoader( + self.val_dataset, + batch_size=self.batch_size, + shuffle=False, + num_workers=self.num_workers + ) + + def _initialize_model(self): + """Initialize and modify the super resolution model""" + # Load base model + self.model = AIIABase.load(self.model_name) + + # Freeze CNN layers + for param in self.model.cnn.parameters(): + param.requires_grad = False + + # Add upscaling layer + hidden_size = self.model.config.hidden_size + self.model.upsample = nn.Sequential( + nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False), + nn.Conv2d(hidden_size, 3, kernel_size=3, padding=1) + ) + + # Initialize optimizer and loss function + self.criterion = nn.MSELoss() + self.optimizer = torch.optim.Adam( + [param for param in self.model.parameters() if 'upsample' in str(param)], + lr=self.learning_rate + ) + + self.best_val_loss = float('inf') + + def _train_epoch(self): + """Train model for one epoch""" + self.model.train() + running_loss = 0.0 + + for batch in self.train_loader: + low_res = batch['low_res'].to(self.device) + high_res = batch['high_res'].to(self.device) + + # Forward pass + features = self.model.cnn(low_res) + outputs = self.model.upsample(features) + + loss = self.criterion(outputs, high_res) + + # Backward pass and optimize + self.optimizer.zero_grad() + loss.backward() + self.optimizer.step() + + running_loss += loss.item() + + epoch_loss = running_loss / len(self.train_loader) + print(f"Train Loss: {epoch_loss:.4f}") + + def _validate_epoch(self): + """Validate model performance""" + self.model.eval() + val_loss = 0.0 + + with torch.no_grad(): + for batch in self.val_loader: + low_res = batch['low_res'].to(self.device) + high_res = batch['high_res'].to(self.device) + + features = self.model.cnn(low_res) + outputs = self.model.upsample(features) + + loss = self.criterion(outputs, high_res) + val_loss += loss.item() + + avg_val_loss = val_loss / len(self.val_loader) + print(f"Validation Loss: {avg_val_loss:.4f}") + + # Update best model + if avg_val_loss < self.best_val_loss: + self.best_val_loss = avg_val_loss + + def __repr__(self): + return f"Model ({self.model_name}, batch_size={self.batch_size})" + + +# Example usage: +if __name__ == "__main__": + finetuner = Finetuner( + train_parquet_path="/root/training_data/vision-dataset/image_upscaler.parquet", + val_parquet_path="/root/training_data/vision-dataset/image_vec_upscaler.parquet", + batch_size=2, + learning_rate=0.001 + ) + + finetuner.train_model(num_epochs=10) \ No newline at end of file diff --git a/src/aiunn/inference.py b/src/aiunn/inference.py new file mode 100644 index 0000000..12b2b76 --- /dev/null +++ b/src/aiunn/inference.py @@ -0,0 +1,73 @@ +import torch +from PIL import Image +import torchvision.transforms as T +from torch.nn import functional as F +from aiia.model import AIIABase + +class UpScaler: + def __init__(self, model_path="AIIA-base-512-upscaler", device="cuda"): + self.device = torch.device(device) + self.model = AIIABase.load(model_path).to(self.device) + self.model.eval() + + # Preprocessing transforms + self.preprocess = T.Compose([ + T.Lambda(lambda img: self._pad_to_square(img)), + T.Resize(512), + T.ToTensor(), + T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + ]) + + def _pad_to_square(self, pil_img): + """Pad image to square while maintaining aspect ratio""" + w, h = pil_img.size + max_side = max(w, h) + hp = (max_side - w) // 2 + vp = (max_side - h) // 2 + padding = (hp, vp, max_side - w - hp, max_side - h - vp) + return T.functional.pad(pil_img, padding, 0, 'constant') + + def _remove_padding(self, tensor, original_size): + """Remove padding added during preprocessing""" + _, _, h, w = tensor.shape + orig_w, orig_h = original_size + + # Calculate scale factor + scale = 512 / max(orig_w, orig_h) + new_w = int(orig_w * scale) + new_h = int(orig_h * scale) + + # Calculate padding offsets + pad_w = (512 - new_w) // 2 + pad_h = (512 - new_h) // 2 + + # Remove padding + unpad = tensor[:, :, pad_h:pad_h+new_h, pad_w:pad_w+new_w] + + # Resize to target 2x resolution + return F.interpolate(unpad, size=(orig_h*2, orig_w*2), mode='bilinear', align_corners=False) + + def upscale(self, input_image): + # Preprocess + original_size = input_image.size + input_tensor = self.preprocess(input_image).unsqueeze(0).to(self.device) + + # Inference + with torch.no_grad(): + features = self.model.cnn(input_tensor) + output = self.model.upsample(features) + + # Postprocess + output = self._remove_padding(output, original_size) + + # Convert to PIL Image + output = output.squeeze(0).cpu().detach() + output = (output * 0.5 + 0.5).clamp(0, 1) + return T.functional.to_pil_image(output) + +# Usage example +if __name__ == "__main__": + upscaler = UpScaler() + input_image = Image.open("input.jpg") + output_image = upscaler.upscale(input_image) + output_image.save("output_2x.jpg")