From 7287ba543f2c90d96fbf070c0038a300e7fdcebb Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Thu, 9 Jan 2025 21:53:44 +0100 Subject: [PATCH 01/55] first code commi. First protoyp for AIIA Config --- src/aiia/__init__.py | 0 src/aiia/model/config.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 src/aiia/__init__.py create mode 100644 src/aiia/model/config.py diff --git a/src/aiia/__init__.py b/src/aiia/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/aiia/model/config.py b/src/aiia/model/config.py new file mode 100644 index 0000000..42588ae --- /dev/null +++ b/src/aiia/model/config.py @@ -0,0 +1,30 @@ +import json + +class AIIAConfig: + def __init__( + self, + model_name: str = "AIIA", + radius: int = 3, + activation_function: str = "gelu", + hidden_size: int = 128, + num_hidden_layers: int = 2, + num_channels: int = 3, + ): + self.model_name = model_name + self.radius = radius + self.activation_function = activation_function + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_channels = num_channels + + def save(self, file_path): + # Save config to JSON + with open(file_path, 'w') as f: + json.dump(self.__dict__, f) + + @classmethod + def load(cls, file_path): + # Load config from JSON + with open(file_path, 'r') as f: + config_dict = json.load(f) + return cls(**config_dict) \ No newline at end of file From 0ba2a3f23cd978cd6e71fa0e9bb4d7c48812a32f Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sat, 11 Jan 2025 23:37:16 +0100 Subject: [PATCH 02/55] added first dataloader conzept --- src/data/DataLoader.py | 68 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 src/data/DataLoader.py diff --git a/src/data/DataLoader.py b/src/data/DataLoader.py new file mode 100644 index 0000000..fa7a105 --- /dev/null +++ b/src/data/DataLoader.py @@ -0,0 +1,68 @@ +import os +from PIL import Image +import torch +from torch.utils.data import Dataset, DataLoader +from torchvision import transforms +import random +import numpy as np + +class AIIADataLoader: + def __init__(self, data_dir, batch_size=32, val_split=0.2, seed=42): + self.data_dir = data_dir + self.batch_size = batch_size + self.val_split = val_split + self.seed = seed + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # Set random seeds for reproducibility + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) + + # Load and split dataset + self.image_paths = self._load_image_paths() + self.train_paths, self.val_paths = self._split_data(self.image_paths) + + # Define transformations + self.transform = transforms.Compose([ + transforms.ToTensor() + ]) + + # Create datasets and dataloaders + self.train_dataset = AIIADataset(self.train_paths, transform=self.transform) + self.val_dataset = AIIADataset(self.val_paths, transform=self.transform) + + self.train_loader = DataLoader(self.train_dataset, batch_size=batch_size, shuffle=True) + self.val_loader = DataLoader(self.val_dataset, batch_size=batch_size, shuffle=False) + + def _load_image_paths(self): + images = [] + for filename in os.listdir(self.data_dir): + if any(filename.endswith(ext) for ext in ['.png', '.jpg', '.jpeg']): + images.append(os.path.join(self.data_dir, filename)) + return images + + def _split_data(self, paths): + n_val = int(len(paths) * self.val_split) + train_paths = paths[n_val:] + val_paths = paths[:n_val] + return train_paths, val_paths + +class AIIADataset(Dataset): + def __init__(self, image_paths, transform=None): + self.image_paths = image_paths + self.transform = transform + + def __len__(self): + return len(self.image_paths) + + def __getitem__(self, idx): + img_path = self.image_paths[idx] + image = Image.open(img_path).convert('RGB') + + if self.transform: + image = self.transform(image) + + return image \ No newline at end of file From 34b4f35e51eab806564e1fa7e47d49cc7b6ee5b2 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sat, 11 Jan 2025 23:39:08 +0100 Subject: [PATCH 03/55] added init File for the DataLoader --- src/data/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/data/__init__.py diff --git a/src/data/__init__.py b/src/data/__init__.py new file mode 100644 index 0000000..e69de29 From 530f499efb48dcb334ceec7f60bff0756d18fac2 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sat, 11 Jan 2025 23:48:31 +0100 Subject: [PATCH 04/55] added "denosing" and "rotation" as pretraining options in the DataLoader --- src/data/DataLoader.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/src/data/DataLoader.py b/src/data/DataLoader.py index fa7a105..5b413cb 100644 --- a/src/data/DataLoader.py +++ b/src/data/DataLoader.py @@ -7,11 +7,12 @@ import random import numpy as np class AIIADataLoader: - def __init__(self, data_dir, batch_size=32, val_split=0.2, seed=42): + def __init__(self, data_dir, batch_size=32, val_split=0.2, seed=42, task='denoising'): self.data_dir = data_dir self.batch_size = batch_size self.val_split = val_split self.seed = seed + self.task = task self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Set random seeds for reproducibility @@ -31,8 +32,8 @@ class AIIADataLoader: ]) # Create datasets and dataloaders - self.train_dataset = AIIADataset(self.train_paths, transform=self.transform) - self.val_dataset = AIIADataset(self.val_paths, transform=self.transform) + self.train_dataset = AIIADataset(self.train_paths, transform=self.transform, task=self.task) + self.val_dataset = AIIADataset(self.val_paths, transform=self.transform, task=self.task) self.train_loader = DataLoader(self.train_dataset, batch_size=batch_size, shuffle=True) self.val_loader = DataLoader(self.val_dataset, batch_size=batch_size, shuffle=False) @@ -51,13 +52,22 @@ class AIIADataLoader: return train_paths, val_paths class AIIADataset(Dataset): - def __init__(self, image_paths, transform=None): + def __init__(self, image_paths, transform=None, task='denoising'): self.image_paths = image_paths self.transform = transform - + self.task = task + if task == 'denoising': + self.noise_transform = transforms.Compose([ + lambda x: x + torch.randn_like(x) * 0.1 # Adjust noise level as needed + ]) + elif task == 'rotation': + self.rotation_angles = [0, 90, 180, 270] + else: + raise ValueError("Unknown task") + def __len__(self): return len(self.image_paths) - + def __getitem__(self, idx): img_path = self.image_paths[idx] image = Image.open(img_path).convert('RGB') @@ -65,4 +75,12 @@ class AIIADataset(Dataset): if self.transform: image = self.transform(image) - return image \ No newline at end of file + if self.task == 'denoising': + noisy_image = self.noise_transform(image) + return noisy_image, image # input: noisy, target: clean + elif self.task == 'rotation': + angle = random.choice(self.rotation_angles) + rotated_image = transforms.functional.rotate(image, angle) + return rotated_image, angle # input: rotated image, target: angle + else: + raise ValueError("Unknown task") \ No newline at end of file From 6757718569909e2df22d467fe4b7903bffc6b0b6 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 12 Jan 2025 20:45:41 +0100 Subject: [PATCH 05/55] improved datahandling --- src/{ => aiia}/data/DataLoader.py | 34 ++++++++++++++----------------- src/aiia/data/__init__.py | 1 + src/data/__init__.py | 0 3 files changed, 16 insertions(+), 19 deletions(-) rename src/{ => aiia}/data/DataLoader.py (73%) create mode 100644 src/aiia/data/__init__.py delete mode 100644 src/data/__init__.py diff --git a/src/data/DataLoader.py b/src/aiia/data/DataLoader.py similarity index 73% rename from src/data/DataLoader.py rename to src/aiia/data/DataLoader.py index 5b413cb..32f9b4f 100644 --- a/src/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -7,12 +7,11 @@ import random import numpy as np class AIIADataLoader: - def __init__(self, data_dir, batch_size=32, val_split=0.2, seed=42, task='denoising'): + def __init__(self, data_dir, batch_size=32, val_split=0.2, seed=42): self.data_dir = data_dir self.batch_size = batch_size self.val_split = val_split self.seed = seed - self.task = task self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Set random seeds for reproducibility @@ -26,30 +25,27 @@ class AIIADataLoader: self.image_paths = self._load_image_paths() self.train_paths, self.val_paths = self._split_data(self.image_paths) + # Split train paths into denoising and rotation subsets + num_train = len(self.train_paths) + mid_index = num_train // 2 + self.denoise_train_paths = self.train_paths[:mid_index] + self.rotation_train_paths = self.train_paths[mid_index:] + # Define transformations self.transform = transforms.Compose([ transforms.ToTensor() ]) - # Create datasets and dataloaders - self.train_dataset = AIIADataset(self.train_paths, transform=self.transform, task=self.task) - self.val_dataset = AIIADataset(self.val_paths, transform=self.transform, task=self.task) + # Create datasets and dataloaders for denoising and rotation + self.denoise_dataset = AIIADataset(self.denoise_train_paths, transform=self.transform) + self.rotation_dataset = AIIADataset(self.rotation_train_paths, transform=self.transform) - self.train_loader = DataLoader(self.train_dataset, batch_size=batch_size, shuffle=True) + self.denoise_loader = DataLoader(self.denoise_dataset, batch_size=batch_size, shuffle=True) + self.rotation_loader = DataLoader(self.rotation_dataset, batch_size=batch_size, shuffle=True) + + # Validation loader + self.val_dataset = AIIADataset(self.val_paths, transform=self.transform) self.val_loader = DataLoader(self.val_dataset, batch_size=batch_size, shuffle=False) - - def _load_image_paths(self): - images = [] - for filename in os.listdir(self.data_dir): - if any(filename.endswith(ext) for ext in ['.png', '.jpg', '.jpeg']): - images.append(os.path.join(self.data_dir, filename)) - return images - - def _split_data(self, paths): - n_val = int(len(paths) * self.val_split) - train_paths = paths[n_val:] - val_paths = paths[:n_val] - return train_paths, val_paths class AIIADataset(Dataset): def __init__(self, image_paths, transform=None, task='denoising'): diff --git a/src/aiia/data/__init__.py b/src/aiia/data/__init__.py new file mode 100644 index 0000000..d1ae9b0 --- /dev/null +++ b/src/aiia/data/__init__.py @@ -0,0 +1 @@ +from .DataLoader import AIIADataLoader \ No newline at end of file diff --git a/src/data/__init__.py b/src/data/__init__.py deleted file mode 100644 index e69de29..0000000 From b371d747fd6a3a3c620b072e1a404e4b3e3073ae Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 12 Jan 2025 20:49:22 +0100 Subject: [PATCH 06/55] models for training --- src/aiia/__init__.py | 4 ++ src/aiia/model/Model.py | 144 +++++++++++++++++++++++++++++++++++++ src/aiia/model/__init__.py | 2 + 3 files changed, 150 insertions(+) create mode 100644 src/aiia/model/Model.py create mode 100644 src/aiia/model/__init__.py diff --git a/src/aiia/__init__.py b/src/aiia/__init__.py index e69de29..25b8128 100644 --- a/src/aiia/__init__.py +++ b/src/aiia/__init__.py @@ -0,0 +1,4 @@ +# Import submodules +from .model import AIIA, AIIAEncoder +from .data import AIIADataLoader +from .model.config import AIIAConfig \ No newline at end of file diff --git a/src/aiia/model/Model.py b/src/aiia/model/Model.py new file mode 100644 index 0000000..ffc1a84 --- /dev/null +++ b/src/aiia/model/Model.py @@ -0,0 +1,144 @@ +import torch +import torch.nn as nn +import json +import os + +from aiia.model.config import AIIAConfig + + +class AIIA(nn.Module): + def __init__(self, config: AIIAConfig): + super(AIIA, self).__init__() + self.patch_size = 2 * config.radius + 1 + input_dim = self.patch_size * self.patch_size * config.num_channels + + # Define layers based on the number of hidden layers in the config + self.layers = nn.ModuleList() + + # First layer: input_dim to hidden_size + self.layers.append(nn.Linear(input_dim, config.hidden_size)) + + # Intermediate hidden layers: hidden_size to hidden_size + for _ in range(config.num_hidden_layers - 1): + self.layers.append(nn.Linear(config.hidden_size, config.hidden_size)) + + # Last layer: hidden_size back to input_dim + self.layers.append(nn.Linear(config.hidden_size, input_dim)) + + # Store the configuration + self.config = config + + def forward(self, x): + for i, layer in enumerate(self.layers): + if i == len(self.layers) - 1: + # No activation function on the last layer + x = layer(x) + else: + # Apply the specified activation function to all but the last layer + x = self.activation_function(x) + return x + + def activation_function(self, x): + if self.config.activation_function == "relu": + return torch.relu(x) + elif self.config.activation_function == "gelu": + return nn.functional.gelu(x) + elif self.config.activation_function == "sigmoid": + return torch.sigmoid(x) + elif self.config.activation_function == "tanh": + return torch.tanh(x) + else: + raise ValueError(f"Unsupported activation function: {self.config.activation_function}") + + def predict(self, input_image, patch_size=None, stride=None): + if patch_size is None: + patch_size = 2 * self.config.radius + 1 + if stride is None: + stride = patch_size // 2 # Overlap by half the patch size + + # Extract patches from the input image + patches = self.extract_patches(input_image, patch_size, stride) + + # Process each patch through the model + with torch.no_grad(): + predictions = [] + for patch in patches: + patch = patch.view(1, -1).to(self.device) + pred = self(patch) + predictions.append(pred.view(patch_size, patch_size, self.config.num_channels).cpu()) + + # Reconstruct the image from the predicted patches + output_image = torch.zeros_like(input_image) + count_map = torch.zeros_like(input_image) + + patch_idx = 0 + for y in range(0, input_image.shape[1] - patch_size + 1, stride): + for x in range(0, input_image.shape[2] - patch_size + 1, stride): + output_image[:, y:y+patch_size, x:x+patch_size] += predictions[patch_idx] + count_map[:, y:y+patch_size, x:x+patch_size] += 1 + patch_idx += 1 + + # Average the overlapping predictions + output_image /= count_map + + return output_image + + @staticmethod + def extract_patches(image, patch_size, stride): + patches = [] + for y in range(0, image.shape[1] - patch_size + 1, stride): + for x in range(0, image.shape[2] - patch_size + 1, stride): + patch = image[:, y:y+patch_size, x:x+patch_size] + patches.append(patch) + return patches + + @staticmethod + def extract_patches(image, patch_size, stride=None): + if stride is None: + stride = patch_size + + C, H, W = image.shape + patches = [] + + for y in range(0, H - patch_size + 1, stride): + for x in range(0, W - patch_size + 1, stride): + patch = image[:, y:y+patch_size, x:x+patch_size] + patches.append(patch) + + return torch.stack(patches) + + def save(self, folderpath: str): + # Ensure the folder exists + os.makedirs(folderpath, exist_ok=True) + + # Save the model state dictionary + model_state_dict = self.state_dict() + + # Serialize and save the configuration as JSON + with open(os.path.join(folderpath, 'config.json'), 'w') as f: + json.dump(self.config.__dict__, f) + + # Save the model state dictionary + torch.save(model_state_dict, os.path.join(folderpath, 'model.pth')) + + def load(self, folderpath: str): + with open(os.path.join(folderpath, 'config.json'), 'r') as f: + config_dict = json.load(f) + + # Assuming Config has a constructor that takes a dictionary + self.config = AIIAConfig(**config_dict) + + # Load the model state dictionary into the current instance + model_state_dict = torch.load(os.path.join(folderpath, 'model.pth')) + self.load_state_dict(model_state_dict) + + return config_dict, model_state_dict + + +class AIIAEncoder(AIIA): + def __init__(self, config): + super().__init__(config) + self.encoder = torch.nn.Sequential(*list(self.layers.children())[:config.encoder_layers]) + + def forward(self, x): + return self.encoder(x) \ No newline at end of file diff --git a/src/aiia/model/__init__.py b/src/aiia/model/__init__.py new file mode 100644 index 0000000..8591251 --- /dev/null +++ b/src/aiia/model/__init__.py @@ -0,0 +1,2 @@ +from .config import AIIAConfig +from .Model import AIIA, AIIAEncoder \ No newline at end of file From cbacd5e03c7649c559c0a294aae81cd20e0b40be Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 13 Jan 2025 11:06:30 +0100 Subject: [PATCH 07/55] imrpoved Dataloader to actually load the images --- src/aiia/data/DataLoader.py | 137 ++++++++++++++++++++---------------- 1 file changed, 76 insertions(+), 61 deletions(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index 32f9b4f..d163112 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -1,82 +1,97 @@ import os from PIL import Image import torch -from torch.utils.data import Dataset, DataLoader +from torch.utils.data import DataLoader from torchvision import transforms import random import numpy as np +from sklearn.model_selection import train_test_split + class AIIADataLoader: - def __init__(self, data_dir, batch_size=32, val_split=0.2, seed=42): - self.data_dir = data_dir - self.batch_size = batch_size - self.val_split = val_split - self.seed = seed - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - - # Set random seeds for reproducibility - random.seed(seed) - np.random.seed(seed) + def __init__(self, data_dir, batch_size=32, val_split=0.2, seed=42, limit=None): torch.manual_seed(seed) - if torch.cuda.is_available(): - torch.cuda.manual_seed_all(seed) + np.random.seed(seed) + random.seed(seed) - # Load and split dataset - self.image_paths = self._load_image_paths() - self.train_paths, self.val_paths = self._split_data(self.image_paths) + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print(f'Using device: {self.device}') + + image_paths = self._load_image_paths(data_dir, limit=limit) + + train_paths, val_paths = self._split_data(image_paths, val_split=val_split) - # Split train paths into denoising and rotation subsets - num_train = len(self.train_paths) - mid_index = num_train // 2 - self.denoise_train_paths = self.train_paths[:mid_index] - self.rotation_train_paths = self.train_paths[mid_index:] - - # Define transformations - self.transform = transforms.Compose([ + # Create combined dataset for training with both denoise and rotate tasks + train_denoise_paths = [(path, 'denoise') for path in train_paths] + train_rotate_paths = [(path, 'rotate') for path in train_paths] + train_combined = train_denoise_paths + train_rotate_paths + + val_denoise_paths = [(path, 'denoise') for path in val_paths] + val_rotate_paths = [(path, 'rotate') for path in val_paths] + val_combined = val_denoise_paths + val_rotate_paths + + transform = transforms.Compose([ + transforms.Resize((256, 256)), transforms.ToTensor() ]) - - # Create datasets and dataloaders for denoising and rotation - self.denoise_dataset = AIIADataset(self.denoise_train_paths, transform=self.transform) - self.rotation_dataset = AIIADataset(self.rotation_train_paths, transform=self.transform) - - self.denoise_loader = DataLoader(self.denoise_dataset, batch_size=batch_size, shuffle=True) - self.rotation_loader = DataLoader(self.rotation_dataset, batch_size=batch_size, shuffle=True) - - # Validation loader - self.val_dataset = AIIADataset(self.val_paths, transform=self.transform) + + self.train_dataset = AIIADataset(train_combined, transform=transform) + self.val_dataset = AIIADataset(val_combined, transform=transform) + + self.train_loader = DataLoader(self.train_dataset, batch_size=batch_size, shuffle=True) self.val_loader = DataLoader(self.val_dataset, batch_size=batch_size, shuffle=False) -class AIIADataset(Dataset): - def __init__(self, image_paths, transform=None, task='denoising'): - self.image_paths = image_paths + def _load_image_paths(self, data_dir, limit=None): + extensions = ('.png', '.jpeg', '.jpg') + image_paths = [] + for root, dirs, files in os.walk(data_dir): + for file in files: + if file.lower().endswith(extensions): + image_paths.append(os.path.join(root, file)) + image_paths = sorted(list(set(image_paths))) + if limit is not None: + image_paths = image_paths[:limit] + return image_paths + + def _split_data(self, image_paths, val_split=0.2): + train_paths, val_paths = train_test_split( + image_paths, test_size=val_split, random_state=42 + ) + return train_paths, val_paths + + +class AIIADataset(torch.utils.data.Dataset): + def __init__(self, data_paths, transform=None, preload=False): + self.data_paths = data_paths self.transform = transform - self.task = task - if task == 'denoising': - self.noise_transform = transforms.Compose([ - lambda x: x + torch.randn_like(x) * 0.1 # Adjust noise level as needed - ]) - elif task == 'rotation': - self.rotation_angles = [0, 90, 180, 270] - else: - raise ValueError("Unknown task") + self.preload = preload + self.loaded_images = {} + + if self.preload: + for path, task in self.data_paths: + img = Image.open(path).convert('RGB') + self.loaded_images[path] = img def __len__(self): - return len(self.image_paths) + return len(self.data_paths) def __getitem__(self, idx): - img_path = self.image_paths[idx] - image = Image.open(img_path).convert('RGB') - - if self.transform: - image = self.transform(image) - - if self.task == 'denoising': - noisy_image = self.noise_transform(image) - return noisy_image, image # input: noisy, target: clean - elif self.task == 'rotation': - angle = random.choice(self.rotation_angles) - rotated_image = transforms.functional.rotate(image, angle) - return rotated_image, angle # input: rotated image, target: angle + path, task = self.data_paths[idx] + if self.preload: + img = self.loaded_images[path] else: - raise ValueError("Unknown task") \ No newline at end of file + img = Image.open(path).convert('RGB') + + if task == 'denoise': + noise_std = 0.1 + noisy_img = img + torch.randn_like(img) * noise_std + target = img + return noisy_img, target, task + elif task == 'rotate': + angles = [0, 90, 180, 270] + angle = random.choice(angles) + rotated_img = transforms.functional.rotate(img, angle) + target = torch.tensor(angle).long() + return rotated_img, target, task + else: + raise ValueError(f"Unknown task: {task}") \ No newline at end of file From 4c19838daba332842cefc624610205c72d1d144d Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 20 Jan 2025 13:25:36 +0100 Subject: [PATCH 08/55] converted to cnn models --- src/aiia/model/Model.py | 221 +++++++++++++++------------------------ src/aiia/model/config.py | 24 ++++- 2 files changed, 106 insertions(+), 139 deletions(-) diff --git a/src/aiia/model/Model.py b/src/aiia/model/Model.py index ffc1a84..68288c9 100644 --- a/src/aiia/model/Model.py +++ b/src/aiia/model/Model.py @@ -1,144 +1,97 @@ +from config import AIIAConfig +from torch import nn import torch -import torch.nn as nn -import json -import os - -from aiia.model.config import AIIAConfig class AIIA(nn.Module): def __init__(self, config: AIIAConfig): super(AIIA, self).__init__() - self.patch_size = 2 * config.radius + 1 - input_dim = self.patch_size * self.patch_size * config.num_channels - - # Define layers based on the number of hidden layers in the config - self.layers = nn.ModuleList() - - # First layer: input_dim to hidden_size - self.layers.append(nn.Linear(input_dim, config.hidden_size)) - - # Intermediate hidden layers: hidden_size to hidden_size - for _ in range(config.num_hidden_layers - 1): - self.layers.append(nn.Linear(config.hidden_size, config.hidden_size)) - - # Last layer: hidden_size back to input_dim - self.layers.append(nn.Linear(config.hidden_size, input_dim)) - - # Store the configuration self.config = config - def forward(self, x): - for i, layer in enumerate(self.layers): - if i == len(self.layers) - 1: - # No activation function on the last layer - x = layer(x) - else: - # Apply the specified activation function to all but the last layer - x = self.activation_function(x) - return x + def save(self, model_path, config_path): + torch.save(self.state_dict(), model_path) + self.config.save(config_path) - def activation_function(self, x): - if self.config.activation_function == "relu": - return torch.relu(x) - elif self.config.activation_function == "gelu": - return nn.functional.gelu(x) - elif self.config.activation_function == "sigmoid": - return torch.sigmoid(x) - elif self.config.activation_function == "tanh": - return torch.tanh(x) + @classmethod + def load(cls, config_path, model_path): + config = AIIAConfig.load(config_path) + model = cls(config) + model.load_state_dict(torch.load(model_path)) + return model + +class AIIABase(AIIA): + def __init__(self, config: AIIAConfig): + super(AIIABase, self).__init__(config) + layers = [] + in_channels = config.num_channels + for _ in range(config.num_hidden_layers): + layers.extend([ + nn.Conv2d(in_channels, config.hidden_size, kernel_size=config.kernel_size, padding=1), + getattr(nn, config.activation_function)(), + nn.MaxPool2d(kernel_size=2) + ]) + in_channels = config.hidden_size + self.cnn = nn.Sequential(*layers) + + def forward(self, x): + return self.cnn(x) + +class AIIAExpert(AIIA): + def __init__(self, config: AIIAConfig): + super(AIIAExpert, self).__init__(config) + self.base_cnn = AIIABase(config) + + def forward(self, x): + return self.base_cnn(x) + +class AIIAmoe(AIIA): + def __init__(self, config: AIIAConfig, num_experts: int = 3): + super(AIIAmoe, self).__init__(config) + self.experts = nn.ModuleList([AIIAExpert(config) for _ in range(num_experts)]) + self.gate = nn.Sequential( + nn.Linear(config.hidden_size, num_experts), + nn.Softmax(dim=1) + ) + + def forward(self, x): + expert_outputs = torch.stack([expert(x) for expert in self.experts], dim=1) + gate_weights = self.gate(torch.mean(expert_outputs, (2, 3))) + merged_output = torch.sum(expert_outputs * gate_weights.unsqueeze(2).unsqueeze(3), dim=1) + return merged_output + +class AIIAchunked(AIIA): + def __init__(self, config: AIIAConfig, patch_size: int = 16): + super(AIIAchunked, self).__init__(config) + self.patch_size = patch_size + self.base_cnn = AIIABase(config) + + def forward(self, x): + patches = x.unfold(2, self.patch_size, self.patch_size).unfold(3, self.patch_size, self.patch_size) + patches = patches.contiguous().view(patches.size(0), patches.size(1), -1, self.patch_size, self.patch_size) + patch_outputs = [] + for p in torch.split(patches, 1, dim=2): + p = p.squeeze(2) + po = self.base_cnn(p) + patch_outputs.append(po) + combined_output = torch.mean(torch.stack(patch_outputs, dim=0), dim=0) + return combined_output + +class AIIAresursive(AIIA): + def __init__(self, config: AIIAConfig, recursion_depth: int = 2): + super(AIIAresursive, self).__init__(config) + self.recursion_depth = recursion_depth + self.chunked_cnn = AIIAchunked(config) + + def forward(self, x, depth=0): + if depth == self.recursion_depth: + return self.chunked_cnn(x) else: - raise ValueError(f"Unsupported activation function: {self.config.activation_function}") - - def predict(self, input_image, patch_size=None, stride=None): - if patch_size is None: - patch_size = 2 * self.config.radius + 1 - if stride is None: - stride = patch_size // 2 # Overlap by half the patch size - - # Extract patches from the input image - patches = self.extract_patches(input_image, patch_size, stride) - - # Process each patch through the model - with torch.no_grad(): - predictions = [] - for patch in patches: - patch = patch.view(1, -1).to(self.device) - pred = self(patch) - predictions.append(pred.view(patch_size, patch_size, self.config.num_channels).cpu()) - - # Reconstruct the image from the predicted patches - output_image = torch.zeros_like(input_image) - count_map = torch.zeros_like(input_image) - - patch_idx = 0 - for y in range(0, input_image.shape[1] - patch_size + 1, stride): - for x in range(0, input_image.shape[2] - patch_size + 1, stride): - output_image[:, y:y+patch_size, x:x+patch_size] += predictions[patch_idx] - count_map[:, y:y+patch_size, x:x+patch_size] += 1 - patch_idx += 1 - - # Average the overlapping predictions - output_image /= count_map - - return output_image - - @staticmethod - def extract_patches(image, patch_size, stride): - patches = [] - for y in range(0, image.shape[1] - patch_size + 1, stride): - for x in range(0, image.shape[2] - patch_size + 1, stride): - patch = image[:, y:y+patch_size, x:x+patch_size] - patches.append(patch) - return patches - - @staticmethod - def extract_patches(image, patch_size, stride=None): - if stride is None: - stride = patch_size - - C, H, W = image.shape - patches = [] - - for y in range(0, H - patch_size + 1, stride): - for x in range(0, W - patch_size + 1, stride): - patch = image[:, y:y+patch_size, x:x+patch_size] - patches.append(patch) - - return torch.stack(patches) - - def save(self, folderpath: str): - # Ensure the folder exists - os.makedirs(folderpath, exist_ok=True) - - # Save the model state dictionary - model_state_dict = self.state_dict() - - # Serialize and save the configuration as JSON - with open(os.path.join(folderpath, 'config.json'), 'w') as f: - json.dump(self.config.__dict__, f) - - # Save the model state dictionary - torch.save(model_state_dict, os.path.join(folderpath, 'model.pth')) - - def load(self, folderpath: str): - with open(os.path.join(folderpath, 'config.json'), 'r') as f: - config_dict = json.load(f) - - # Assuming Config has a constructor that takes a dictionary - self.config = AIIAConfig(**config_dict) - - # Load the model state dictionary into the current instance - model_state_dict = torch.load(os.path.join(folderpath, 'model.pth')) - self.load_state_dict(model_state_dict) - - return config_dict, model_state_dict - - -class AIIAEncoder(AIIA): - def __init__(self, config): - super().__init__(config) - self.encoder = torch.nn.Sequential(*list(self.layers.children())[:config.encoder_layers]) - - def forward(self, x): - return self.encoder(x) \ No newline at end of file + patches = x.unfold(2, 16, 16).unfold(3, 16, 16) + patches = patches.contiguous().view(patches.size(0), patches.size(1), -1, 16, 16) + processed_patches = [] + for p in torch.split(patches, 1, dim=2): + p = p.squeeze(2) + pp = self.forward(p, depth + 1) + processed_patches.append(pp) + combined_output = torch.mean(torch.stack(processed_patches, dim=0), dim=0) + return combined_output diff --git a/src/aiia/model/config.py b/src/aiia/model/config.py index 42588ae..53ecb56 100644 --- a/src/aiia/model/config.py +++ b/src/aiia/model/config.py @@ -1,30 +1,44 @@ +import torch +import torch.nn as nn import json class AIIAConfig: def __init__( self, model_name: str = "AIIA", - radius: int = 3, + kernel_size: int = 3, activation_function: str = "gelu", hidden_size: int = 128, num_hidden_layers: int = 2, num_channels: int = 3, + learning_rate: float = 5e5 ): self.model_name = model_name - self.radius = radius + self.kernel_size = kernel_size self.activation_function = activation_function self.hidden_size = hidden_size self.num_hidden_layers = num_hidden_layers self.num_channels = num_channels + self.learning_rate = learning_rate + + @property + def activation_function(self): + return self._activation_function + + @activation_function.setter + def activation_function(self, value): + attr = getattr(nn, value, None) + if attr is None or (not callable(attr) and not isinstance(attr, type(nn.Module))): + valid_funcs = [func for func in dir(nn) if callable(getattr(nn, func)) or isinstance(getattr(nn, func), type(nn.Module))] + raise ValueError(f"Invalid activation function: {value}. Choose from: {', '.join(valid_funcs)}") + self._activation_function = value def save(self, file_path): - # Save config to JSON with open(file_path, 'w') as f: json.dump(self.__dict__, f) @classmethod def load(cls, file_path): - # Load config from JSON with open(file_path, 'r') as f: config_dict = json.load(f) - return cls(**config_dict) \ No newline at end of file + return cls(**config_dict) From 99c3ec38c71a14efe2db78ef3533b76a5b54ac55 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Tue, 21 Jan 2025 20:06:53 +0100 Subject: [PATCH 09/55] updated config for cnn --- src/aiia/model/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aiia/model/__init__.py b/src/aiia/model/__init__.py index 8591251..5757152 100644 --- a/src/aiia/model/__init__.py +++ b/src/aiia/model/__init__.py @@ -1,2 +1,2 @@ from .config import AIIAConfig -from .Model import AIIA, AIIAEncoder \ No newline at end of file +from .Model import AIIA, AIIABase, AIIAchunked, AIIAExpert, AIIAmoe, AIIAresursive \ No newline at end of file From 106539f48af4b6dbe0fdc76a8ea0dcfd5cabb89c Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Tue, 21 Jan 2025 21:19:37 +0100 Subject: [PATCH 10/55] uupdated DataLoader for new AIIA CNN arch --- src/aiia/data/DataLoader.py | 279 +++++++++++++++++++++++++----------- 1 file changed, 194 insertions(+), 85 deletions(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index d163112..d3cb900 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -1,97 +1,206 @@ -import os +import io from PIL import Image import torch from torch.utils.data import DataLoader from torchvision import transforms import random -import numpy as np -from sklearn.model_selection import train_test_split +import re -class AIIADataLoader: - def __init__(self, data_dir, batch_size=32, val_split=0.2, seed=42, limit=None): - torch.manual_seed(seed) - np.random.seed(seed) - random.seed(seed) +class FilePathLoader: + def __init__(self, dataset, file_path_column="file_path", label_column=None): + self.dataset = dataset + self.file_path_column = file_path_column + self.label_column = label_column + self.successful_count = 0 + self.skipped_count = 0 - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - print(f'Using device: {self.device}') - - image_paths = self._load_image_paths(data_dir, limit=limit) - - train_paths, val_paths = self._split_data(image_paths, val_split=val_split) - - # Create combined dataset for training with both denoise and rotate tasks - train_denoise_paths = [(path, 'denoise') for path in train_paths] - train_rotate_paths = [(path, 'rotate') for path in train_paths] - train_combined = train_denoise_paths + train_rotate_paths - - val_denoise_paths = [(path, 'denoise') for path in val_paths] - val_rotate_paths = [(path, 'rotate') for path in val_paths] - val_combined = val_denoise_paths + val_rotate_paths - - transform = transforms.Compose([ - transforms.Resize((256, 256)), - transforms.ToTensor() - ]) - - self.train_dataset = AIIADataset(train_combined, transform=transform) - self.val_dataset = AIIADataset(val_combined, transform=transform) - - self.train_loader = DataLoader(self.train_dataset, batch_size=batch_size, shuffle=True) - self.val_loader = DataLoader(self.val_dataset, batch_size=batch_size, shuffle=False) - - def _load_image_paths(self, data_dir, limit=None): - extensions = ('.png', '.jpeg', '.jpg') - image_paths = [] - for root, dirs, files in os.walk(data_dir): - for file in files: - if file.lower().endswith(extensions): - image_paths.append(os.path.join(root, file)) - image_paths = sorted(list(set(image_paths))) - if limit is not None: - image_paths = image_paths[:limit] - return image_paths - - def _split_data(self, image_paths, val_split=0.2): - train_paths, val_paths = train_test_split( - image_paths, test_size=val_split, random_state=42 - ) - return train_paths, val_paths + if self.file_path_column not in dataset.column_names: + raise ValueError(f"Column '{self.file_path_column}' not found in dataset.") + + def _get_image(self, item): + try: + path = item[self.file_path_column] + image = Image.open(path).convert("RGB") + return image + except Exception as e: + print(f"Error loading image from {path}: {e}") + return None - -class AIIADataset(torch.utils.data.Dataset): - def __init__(self, data_paths, transform=None, preload=False): - self.data_paths = data_paths - self.transform = transform - self.preload = preload - self.loaded_images = {} - - if self.preload: - for path, task in self.data_paths: - img = Image.open(path).convert('RGB') - self.loaded_images[path] = img - - def __len__(self): - return len(self.data_paths) - - def __getitem__(self, idx): - path, task = self.data_paths[idx] - if self.preload: - img = self.loaded_images[path] + def get_item(self, idx): + item = self.dataset[idx] + image = self._get_image(item) + if image is not None: + self.successful_count += 1 + if self.label_column is not None: + label = item.get(self.label_column) + return (image, label) + else: + return (image,) else: - img = Image.open(path).convert('RGB') + self.skipped_count += 1 + return None + + def print_summary(self): + print(f"Successfully converted {self.successful_count} images.") + print(f"Skipped {self.skipped_count} images due to errors.") + +class JPGImageLoader: + def __init__(self, dataset, bytes_column="jpg", label_column=None): + self.dataset = dataset + self.bytes_column = bytes_column + self.label_column = label_column + self.successful_count = 0 + self.skipped_count = 0 - if task == 'denoise': - noise_std = 0.1 - noisy_img = img + torch.randn_like(img) * noise_std - target = img - return noisy_img, target, task - elif task == 'rotate': - angles = [0, 90, 180, 270] - angle = random.choice(angles) - rotated_img = transforms.functional.rotate(img, angle) - target = torch.tensor(angle).long() - return rotated_img, target, task + if self.bytes_column not in dataset.column_names: + raise ValueError(f"Column '{self.bytes_column}' not found in dataset.") + + def _get_image(self, item): + try: + bytes_data = item[self.bytes_column] + img_bytes = io.BytesIO(bytes_data) + image = Image.open(img_bytes).convert("RGB") + return image + except Exception as e: + print(f"Error loading image from bytes: {e}") + return None + + def get_item(self, idx): + item = self.dataset[idx] + image = self._get_image(item) + if image is not None: + self.successful_count += 1 + if self.label_column is not None: + label = item.get(self.label_column) + return (image, label) + else: + return (image,) else: - raise ValueError(f"Unknown task: {task}") \ No newline at end of file + self.skipped_count += 1 + return None + + def print_summary(self): + print(f"Successfully converted {self.successful_count} images.") + print(f"Skipped {self.skipped_count} images due to errors.") + + +class AIIADataLoader(DataLoader): + def __init__(self, dataset, + batch_size=32, + val_split=0.2, + seed=42, + column="file_path", + label_column=None): + super().__init__() + + self.batch_size = batch_size + self.val_split = val_split + self.seed = seed + + # Determine which loader to use based on the dataset's content + # Check if any entry in bytes_column is a bytes or bytestring type + is_bytes_or_bytestring = any( + isinstance(value, (bytes, memoryview)) + for value in dataset[column].dropna().head(1).astype(str) + ) + + if is_bytes_or_bytestring: + self.loader = JPGImageLoader( + dataset, + bytes_column=column, + label_column=label_column + ) + else: + # Check if file_path column contains valid image file paths (at least one entry) + sample_paths = dataset[column].dropna().head(1).astype(str) + + # Regex pattern for matching image file paths (adjust as needed) + filepath_pattern = r'.*(?:/|\\).*\.([jJ][pP][gG]|png|gif)$' + + if any( + re.match(filepath_pattern, path, flags=re.IGNORECASE) + for path in sample_paths + ): + self.loader = FilePathLoader( + dataset, + file_path_column=column, + label_column=label_column + ) + else: + # If neither condition is met, default to JPGImageLoader (assuming bytes are stored as strings) + self.loader = JPGImageLoader( + dataset, + bytes_column=column, + label_column=label_column + ) + + # Get all items + self.items = [self.loader.get_item(idx) for idx in range(len(dataset))] + + # Split into train and validation sets + train_indices, val_indices = self._split_data() + + # Create datasets for training and validation + self.train_dataset = self._create_subset(train_indices) + self.val_dataset = self._create_subset(val_indices) + + def _split_data(self): + if len(self.items) == 0: + return [], [] + + tasks = [item[1] if len(item) > 1 and hasattr(item, '__getitem__') else None for item in self.items] + unique_tasks = list(set(tasks)) if tasks.count(None) < len(tasks) else [] + + train_indices = [] + val_indices = [] + + for task in unique_tasks: + task_indices = [i for i, t in enumerate(tasks) if t == task] + n_val = int(len(task_indices) * self.val_split) + + random.shuffle(task_indices) + + val_indices.extend(task_indices[:n_val]) + train_indices.extend(task_indices[n_val:]) + + return train_indices, val_indices + + def _create_subset(self, indices): + subset_items = [self.items[i] for i in indices] + return AIIADataset(subset_items) + +class AIIADataset(torch.utils.data.Dataset): + def __init__(self, items): + self.items = items + + def __len__(self): + return len(self.items) + + def __getitem__(self, idx): + item = self.items[idx] + if isinstance(item, tuple) and len(item) == 2: + image, label = item + return (image, label) + elif isinstance(item, tuple) and len(item) == 3: + image, task, label = item + # Handle tasks accordingly (e.g., apply different augmentations) + if task == 'denoise': + noise_std = 0.1 + noisy_img = image + torch.randn_like(image) * noise_std + target = image + return (noisy_img, target, task) + elif task == 'rotate': + angles = [0, 90, 180, 270] + angle = random.choice(angles) + rotated_img = transforms.functional.rotate(image, angle) + target = torch.tensor(angle).long() + return (rotated_img, target, task) + else: + raise ValueError(f"Unknown task: {task}") + else: + # Handle single images without labels or tasks + if isinstance(item, Image.Image): + return item + else: + raise ValueError("Invalid item format.") From b87ce68c825a377f53b3458bc7d6368ac7c23890 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Tue, 21 Jan 2025 21:44:16 +0100 Subject: [PATCH 11/55] updated config t add kwrags to support future changes to the config for different models --- src/aiia/model/config.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/aiia/model/config.py b/src/aiia/model/config.py index 53ecb56..274d7a3 100644 --- a/src/aiia/model/config.py +++ b/src/aiia/model/config.py @@ -11,7 +11,8 @@ class AIIAConfig: hidden_size: int = 128, num_hidden_layers: int = 2, num_channels: int = 3, - learning_rate: float = 5e5 + learning_rate: float = 5e-5, + **kwargs ): self.model_name = model_name self.kernel_size = kernel_size @@ -20,6 +21,10 @@ class AIIAConfig: self.num_hidden_layers = num_hidden_layers self.num_channels = num_channels self.learning_rate = learning_rate + + # Store additional keyword arguments as attributes + for key, value in kwargs.items(): + setattr(self, key, value) @property def activation_function(self): @@ -35,10 +40,10 @@ class AIIAConfig: def save(self, file_path): with open(file_path, 'w') as f: - json.dump(self.__dict__, f) + json.dump(vars(self), f) @classmethod def load(cls, file_path): with open(file_path, 'r') as f: config_dict = json.load(f) - return cls(**config_dict) + return cls(**config_dict) \ No newline at end of file From 74973a325bcdcc3dab6125fff12c7155662f21d6 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Wed, 22 Jan 2025 11:19:55 +0100 Subject: [PATCH 12/55] updated models for improved config --- src/aiia/model/Model.py | 72 ++++++++++++++++++++++++++++------------- 1 file changed, 50 insertions(+), 22 deletions(-) diff --git a/src/aiia/model/Model.py b/src/aiia/model/Model.py index 68288c9..9004c0d 100644 --- a/src/aiia/model/Model.py +++ b/src/aiia/model/Model.py @@ -4,9 +4,13 @@ import torch class AIIA(nn.Module): - def __init__(self, config: AIIAConfig): + def __init__(self, config: AIIAConfig, **kwargs): super(AIIA, self).__init__() self.config = config + + # Update the config with any additional keyword arguments + for key, value in kwargs.items(): + setattr(self.config, key, value) def save(self, model_path, config_path): torch.save(self.state_dict(), model_path) @@ -20,67 +24,89 @@ class AIIA(nn.Module): return model class AIIABase(AIIA): - def __init__(self, config: AIIAConfig): - super(AIIABase, self).__init__(config) + def __init__(self, config: AIIAConfig, **kwargs): + super(AIIABase, self).__init__(config, **kwargs) + + # Initialize layers based on updated config layers = [] - in_channels = config.num_channels - for _ in range(config.num_hidden_layers): + in_channels = self.config.num_channels + for _ in range(self.config.num_hidden_layers): layers.extend([ - nn.Conv2d(in_channels, config.hidden_size, kernel_size=config.kernel_size, padding=1), - getattr(nn, config.activation_function)(), + nn.Conv2d(in_channels, self.config.hidden_size, + kernel_size=self.config.kernel_size, padding=1), + getattr(nn, self.config.activation_function)(), nn.MaxPool2d(kernel_size=2) ]) - in_channels = config.hidden_size + in_channels = self.config.hidden_size self.cnn = nn.Sequential(*layers) def forward(self, x): return self.cnn(x) class AIIAExpert(AIIA): - def __init__(self, config: AIIAConfig): - super(AIIAExpert, self).__init__(config) - self.base_cnn = AIIABase(config) + def __init__(self, config: AIIAConfig, **kwargs): + super(AIIAExpert, self).__init__(config, **kwargs) + self.base_cnn = AIIABase(config, **kwargs) def forward(self, x): return self.base_cnn(x) class AIIAmoe(AIIA): - def __init__(self, config: AIIAConfig, num_experts: int = 3): - super(AIIAmoe, self).__init__(config) - self.experts = nn.ModuleList([AIIAExpert(config) for _ in range(num_experts)]) + def __init__(self, config: AIIAConfig, **kwargs): + super(AIIAmoe, self).__init__(config, **kwargs) + + # Get num_experts from updated config + num_Experts = getattr(self.config, 'num_Experts', 3) + self.experts = nn.ModuleList([AIIAExpert(config, **kwargs) for _ in range(num_Experts)]) + + # Update gate based on latest config values self.gate = nn.Sequential( - nn.Linear(config.hidden_size, num_experts), + nn.Linear(self.config.hidden_size, num_Experts), nn.Softmax(dim=1) ) def forward(self, x): expert_outputs = torch.stack([expert(x) for expert in self.experts], dim=1) gate_weights = self.gate(torch.mean(expert_outputs, (2, 3))) - merged_output = torch.sum(expert_outputs * gate_weights.unsqueeze(2).unsqueeze(3), dim=1) + merged_output = torch.sum( + expert_outputs * gate_weights.unsqueeze(2).unsqueeze(3), dim=1 + ) return merged_output class AIIAchunked(AIIA): - def __init__(self, config: AIIAConfig, patch_size: int = 16): - super(AIIAchunked, self).__init__(config) + def __init__(self, config: AIIAConfig, **kwargs): + super(AIIAchunked, self).__init__(config, **kwargs) + + # Get patch_size from updated config + patch_size = getattr(self.config, 'patch_size', 16) self.patch_size = patch_size - self.base_cnn = AIIABase(config) + + # Initialize base CNN with updated config + self.base_cnn = AIIABase(config, **kwargs) def forward(self, x): patches = x.unfold(2, self.patch_size, self.patch_size).unfold(3, self.patch_size, self.patch_size) patches = patches.contiguous().view(patches.size(0), patches.size(1), -1, self.patch_size, self.patch_size) patch_outputs = [] + for p in torch.split(patches, 1, dim=2): p = p.squeeze(2) po = self.base_cnn(p) patch_outputs.append(po) + combined_output = torch.mean(torch.stack(patch_outputs, dim=0), dim=0) return combined_output class AIIAresursive(AIIA): - def __init__(self, config: AIIAConfig, recursion_depth: int = 2): - super(AIIAresursive, self).__init__(config) + def __init__(self, config: AIIAConfig, **kwargs): + super(AIIAresursive, self).__init__(config, **kwargs) + + # Get recursion_depth from updated config + recursion_depth = getattr(self.config, 'recursion_depth', 2) self.recursion_depth = recursion_depth - self.chunked_cnn = AIIAchunked(config) + + # Initialize chunked CNN with updated config + self.chunked_cnn = AIIAchunked(config, **kwargs) def forward(self, x, depth=0): if depth == self.recursion_depth: @@ -89,9 +115,11 @@ class AIIAresursive(AIIA): patches = x.unfold(2, 16, 16).unfold(3, 16, 16) patches = patches.contiguous().view(patches.size(0), patches.size(1), -1, 16, 16) processed_patches = [] + for p in torch.split(patches, 1, dim=2): p = p.squeeze(2) pp = self.forward(p, depth + 1) processed_patches.append(pp) + combined_output = torch.mean(torch.stack(processed_patches, dim=0), dim=0) return combined_output From 26b701fd77844bb3870d8f62f306608aed760a8a Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Wed, 22 Jan 2025 11:20:05 +0100 Subject: [PATCH 13/55] corrected activation function --- src/aiia/model/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aiia/model/config.py b/src/aiia/model/config.py index 274d7a3..64e6fa3 100644 --- a/src/aiia/model/config.py +++ b/src/aiia/model/config.py @@ -7,7 +7,7 @@ class AIIAConfig: self, model_name: str = "AIIA", kernel_size: int = 3, - activation_function: str = "gelu", + activation_function: str = "GELU", hidden_size: int = 128, num_hidden_layers: int = 2, num_channels: int = 3, From ab58d352c42fbbfc1cfa6358d9840c2988a11bf5 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Wed, 22 Jan 2025 14:16:56 +0100 Subject: [PATCH 14/55] updated saving and first implementation of new additonal parameter handling --- src/aiia/model/Model.py | 109 ++++++++++++++++++++++++--------------- src/aiia/model/config.py | 16 +++--- 2 files changed, 76 insertions(+), 49 deletions(-) diff --git a/src/aiia/model/Model.py b/src/aiia/model/Model.py index 9004c0d..5bc17bc 100644 --- a/src/aiia/model/Model.py +++ b/src/aiia/model/Model.py @@ -1,7 +1,7 @@ from config import AIIAConfig from torch import nn import torch - +import os class AIIA(nn.Module): def __init__(self, config: AIIAConfig, **kwargs): @@ -12,56 +12,70 @@ class AIIA(nn.Module): for key, value in kwargs.items(): setattr(self.config, key, value) - def save(self, model_path, config_path): - torch.save(self.state_dict(), model_path) - self.config.save(config_path) + def save(self, path: str): + # Create the directory if it doesn't exist + if not os.path.exists(path): + os.makedirs(path, exist_ok=True) + torch.save(self.state_dict(), f"{path}/model.pth") + self.config.save(path) @classmethod - def load(cls, config_path, model_path): - config = AIIAConfig.load(config_path) + def load(cls, path): + config = AIIAConfig.load(path) model = cls(config) - model.load_state_dict(torch.load(model_path)) + model.load_state_dict(torch.load(f"{path}/model.pth")) return model class AIIABase(AIIA): def __init__(self, config: AIIAConfig, **kwargs): - super(AIIABase, self).__init__(config, **kwargs) - - # Initialize layers based on updated config + self.config = config + super(AIIABase, self).__init__(config=config) + # Initialize layers based on configuration layers = [] - in_channels = self.config.num_channels - for _ in range(self.config.num_hidden_layers): + in_channels = config.num_channels + + for _ in range(config.num_hidden_layers): layers.extend([ - nn.Conv2d(in_channels, self.config.hidden_size, - kernel_size=self.config.kernel_size, padding=1), - getattr(nn, self.config.activation_function)(), - nn.MaxPool2d(kernel_size=2) + nn.Conv2d(in_channels, config.hidden_size, + kernel_size=config.kernel_size, padding=1), + getattr(nn, config.activation_function)(), + nn. MaxPool2d(kernel_size=2) ]) - in_channels = self.config.hidden_size + in_channels = config.hidden_size + self.cnn = nn.Sequential(*layers) + def forward(self, x): return self.cnn(x) class AIIAExpert(AIIA): def __init__(self, config: AIIAConfig, **kwargs): - super(AIIAExpert, self).__init__(config, **kwargs) - self.base_cnn = AIIABase(config, **kwargs) + self.config = config + super(AIIAExpert, self).__init__(config=config) + + # Initialize base CNN with configuration + self.base_cnn = AIIABase(self.config, **kwargs) def forward(self, x): - return self.base_cnn(x) + return self. base_cnn(x) class AIIAmoe(AIIA): - def __init__(self, config: AIIAConfig, **kwargs): - super(AIIAmoe, self).__init__(config, **kwargs) + def __init__(self, config: AIIAConfig, num_experts: int = 3, **kwargs): + self.config = config + super(AIIAmoe, self).__init__(config=config) - # Get num_experts from updated config - num_Experts = getattr(self.config, 'num_Experts', 3) - self.experts = nn.ModuleList([AIIAExpert(config, **kwargs) for _ in range(num_Experts)]) + # Update config with new parameters if provided + self.config.num_experts = num_experts - # Update gate based on latest config values - self.gate = nn.Sequential( - nn.Linear(self.config.hidden_size, num_Experts), + # Initialize multiple experts + self.experts = nn.ModuleList([ + AIIAExpert(self.config, **kwargs) for _ in range(num_experts) + ]) + + # Create gating network + self. gate = nn.Sequential( + nn.Linear(self.config.hidden_size, num_experts), nn.Softmax(dim=1) ) @@ -74,15 +88,15 @@ class AIIAmoe(AIIA): return merged_output class AIIAchunked(AIIA): - def __init__(self, config: AIIAConfig, **kwargs): - super(AIIAchunked, self).__init__(config, **kwargs) + def __init__(self, config: AIIAConfig, patch_size: int = 16, **kwargs): + self.config = config + super(AIIAchunked, self).__init__(config=config) - # Get patch_size from updated config - patch_size = getattr(self.config, 'patch_size', 16) - self.patch_size = patch_size + # Update config with new parameters if provided + self.config.patch_size = patch_size - # Initialize base CNN with updated config - self.base_cnn = AIIABase(config, **kwargs) + # Initialize base CNN for processing each patch + self.base_cnn = AIIABase(self.config, **kwargs) def forward(self, x): patches = x.unfold(2, self.patch_size, self.patch_size).unfold(3, self.patch_size, self.patch_size) @@ -98,16 +112,16 @@ class AIIAchunked(AIIA): return combined_output class AIIAresursive(AIIA): - def __init__(self, config: AIIAConfig, **kwargs): - super(AIIAresursive, self).__init__(config, **kwargs) - + def __init__(self, config: AIIAConfig, recursion_depth: int = 3, **kwargs): + # Pass recursion_depth as a kwarg to the config + self.config = config + super().__init__(config, recursion_depth=recursion_depth, **kwargs) # Get recursion_depth from updated config - recursion_depth = getattr(self.config, 'recursion_depth', 2) - self.recursion_depth = recursion_depth - - # Initialize chunked CNN with updated config - self.chunked_cnn = AIIAchunked(config, **kwargs) + self.recursion_depth = getattr(self.config, 'recursion_depth', 2) + # Initialize chunked CNN with updated config + self.chunked_cnn = AIIAchunked(self.config, **kwargs) + def forward(self, x, depth=0): if depth == self.recursion_depth: return self.chunked_cnn(x) @@ -123,3 +137,12 @@ class AIIAresursive(AIIA): combined_output = torch.mean(torch.stack(processed_patches, dim=0), dim=0) return combined_output + + + +config = AIIAConfig() + +model = AIIABase(config) +model = AIIAmoe(config=config, num_experts=5) +model = AIIAresursive(config=config) +model.save("moe") \ No newline at end of file diff --git a/src/aiia/model/config.py b/src/aiia/model/config.py index 64e6fa3..f6c5148 100644 --- a/src/aiia/model/config.py +++ b/src/aiia/model/config.py @@ -1,15 +1,17 @@ import torch import torch.nn as nn import json +import os + class AIIAConfig: def __init__( self, model_name: str = "AIIA", - kernel_size: int = 3, + kernel_size: int = 5, activation_function: str = "GELU", - hidden_size: int = 128, - num_hidden_layers: int = 2, + hidden_size: int = 256, + num_hidden_layers: int = 12, num_channels: int = 3, learning_rate: float = 5e-5, **kwargs @@ -39,11 +41,13 @@ class AIIAConfig: self._activation_function = value def save(self, file_path): - with open(file_path, 'w') as f: - json.dump(vars(self), f) + if not os.path.exists(file_path): + os.makedirs(file_path, exist_ok=True) + with open(f"{file_path}/config.json", 'w') as f: + json.dump(vars(self), f, indent=4) @classmethod def load(cls, file_path): - with open(file_path, 'r') as f: + with open(f"{file_path}/config.json", 'r') as f: config_dict = json.load(f) return cls(**config_dict) \ No newline at end of file From 6e6f4c4a2189ecd2e044a4bac0ce0cce89b54b98 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Wed, 22 Jan 2025 14:23:03 +0100 Subject: [PATCH 15/55] updated models and config to improve parameter handling and adding a copy function to use the same base config for mutliple models --- src/aiia/model/Model.py | 71 ++++++++++++++++++---------------------- src/aiia/model/config.py | 2 +- 2 files changed, 33 insertions(+), 40 deletions(-) diff --git a/src/aiia/model/Model.py b/src/aiia/model/Model.py index 5bc17bc..8f05988 100644 --- a/src/aiia/model/Model.py +++ b/src/aiia/model/Model.py @@ -2,18 +2,19 @@ from config import AIIAConfig from torch import nn import torch import os +import copy # Add this for deep copying class AIIA(nn.Module): def __init__(self, config: AIIAConfig, **kwargs): super(AIIA, self).__init__() - self.config = config + # Create a deep copy of the configuration to avoid sharing + self.config = copy.deepcopy(config) # Update the config with any additional keyword arguments for key, value in kwargs.items(): setattr(self.config, key, value) def save(self, path: str): - # Create the directory if it doesn't exist if not os.path.exists(path): os.makedirs(path, exist_ok=True) torch.save(self.state_dict(), f"{path}/model.pth") @@ -28,42 +29,42 @@ class AIIA(nn.Module): class AIIABase(AIIA): def __init__(self, config: AIIAConfig, **kwargs): - self.config = config - super(AIIABase, self).__init__(config=config) + super().__init__(config=config, **kwargs) + self.config = self.config + # Initialize layers based on configuration layers = [] - in_channels = config.num_channels + in_channels = self.config.num_channels - for _ in range(config.num_hidden_layers): + for _ in range(self.config.num_hidden_layers): layers.extend([ - nn.Conv2d(in_channels, config.hidden_size, - kernel_size=config.kernel_size, padding=1), - getattr(nn, config.activation_function)(), - nn. MaxPool2d(kernel_size=2) - ]) - in_channels = config.hidden_size + nn.Conv2d(in_channels, self.config.hidden_size, + kernel_size=self.config.kernel_size, padding=1), + getattr(nn, self.config.activation_function)(), + nn.MaxPool2d(kernel_size=2) + ]) + in_channels = self.config.hidden_size self.cnn = nn.Sequential(*layers) - def forward(self, x): return self.cnn(x) class AIIAExpert(AIIA): def __init__(self, config: AIIAConfig, **kwargs): - self.config = config - super(AIIAExpert, self).__init__(config=config) + super().__init__(config=config, **kwargs) + self.config = self.config # Initialize base CNN with configuration self.base_cnn = AIIABase(self.config, **kwargs) def forward(self, x): - return self. base_cnn(x) + return self.base_cnn(x) class AIIAmoe(AIIA): def __init__(self, config: AIIAConfig, num_experts: int = 3, **kwargs): - self.config = config - super(AIIAmoe, self).__init__(config=config) + super().__init__(config=config, **kwargs) + self.config = self.config # Update config with new parameters if provided self.config.num_experts = num_experts @@ -71,26 +72,26 @@ class AIIAmoe(AIIA): # Initialize multiple experts self.experts = nn.ModuleList([ AIIAExpert(self.config, **kwargs) for _ in range(num_experts) - ]) + ]) # Create gating network - self. gate = nn.Sequential( + self.gate = nn.Sequential( nn.Linear(self.config.hidden_size, num_experts), nn.Softmax(dim=1) - ) + ) def forward(self, x): expert_outputs = torch.stack([expert(x) for expert in self.experts], dim=1) gate_weights = self.gate(torch.mean(expert_outputs, (2, 3))) merged_output = torch.sum( expert_outputs * gate_weights.unsqueeze(2).unsqueeze(3), dim=1 - ) + ) return merged_output class AIIAchunked(AIIA): def __init__(self, config: AIIAConfig, patch_size: int = 16, **kwargs): - self.config = config - super(AIIAchunked, self).__init__(config=config) + super().__init__(config=config, **kwargs) + self.config = self.config # Update config with new parameters if provided self.config.patch_size = patch_size @@ -113,12 +114,13 @@ class AIIAchunked(AIIA): class AIIAresursive(AIIA): def __init__(self, config: AIIAConfig, recursion_depth: int = 3, **kwargs): + + super().__init__(config=config, **kwargs) + self.config = self.config + # Pass recursion_depth as a kwarg to the config - self.config = config - super().__init__(config, recursion_depth=recursion_depth, **kwargs) - # Get recursion_depth from updated config - self.recursion_depth = getattr(self.config, 'recursion_depth', 2) - + self.config.recursion_depth = recursion_depth + # Initialize chunked CNN with updated config self.chunked_cnn = AIIAchunked(self.config, **kwargs) @@ -136,13 +138,4 @@ class AIIAresursive(AIIA): processed_patches.append(pp) combined_output = torch.mean(torch.stack(processed_patches, dim=0), dim=0) - return combined_output - - - -config = AIIAConfig() - -model = AIIABase(config) -model = AIIAmoe(config=config, num_experts=5) -model = AIIAresursive(config=config) -model.save("moe") \ No newline at end of file + return combined_output \ No newline at end of file diff --git a/src/aiia/model/config.py b/src/aiia/model/config.py index f6c5148..e2ae83e 100644 --- a/src/aiia/model/config.py +++ b/src/aiia/model/config.py @@ -10,7 +10,7 @@ class AIIAConfig: model_name: str = "AIIA", kernel_size: int = 5, activation_function: str = "GELU", - hidden_size: int = 256, + hidden_size: int = 512, num_hidden_layers: int = 12, num_channels: int = 3, learning_rate: float = 5e-5, From 599b8c483592ad6ebc6de5e0085ab7ea3bee280d Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Fri, 24 Jan 2025 18:04:44 +0100 Subject: [PATCH 16/55] working shared model (with way to few params) --- src/aiia/model/Model.py | 88 ++++++++++++++++++------ src/pretrain.py | 149 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 216 insertions(+), 21 deletions(-) create mode 100644 src/pretrain.py diff --git a/src/aiia/model/Model.py b/src/aiia/model/Model.py index 8f05988..594a81c 100644 --- a/src/aiia/model/Model.py +++ b/src/aiia/model/Model.py @@ -27,6 +27,44 @@ class AIIA(nn.Module): model.load_state_dict(torch.load(f"{path}/model.pth")) return model +class AIIABaseShared(AIIA): + """ + Base class with parameter sharing. + All hidden layers share the same weights + """ + def __init__(self, config: AIIAConfig, **kwargs): + super().__init__(config=config, **kwargs) + self.config = copy.deepcopy(config) + + # Update config with new parameters if provided + for key, value in kwargs.items(): + setattr(self.config, key, value) + + # Initialize shared layers + self.conv_layer = nn.Conv2d( + self.config.num_channels, + self.config.hidden_size, + kernel_size=self.config.kernel_size, + padding=1 + ) + self.activation_function = getattr(nn, self.config.activation_function)() + self.max_pool = nn.MaxPool2d(kernel_size=2) + + # Create a Sequential container with shared layers repeated + layers = [] + for _ in range(self.config.num_hidden_layers): + layers.extend([ + self.conv_layer, + self.activation_function, + self.max_pool + ]) + + self.cnn = nn.Sequential(*layers) + + def forward(self, x): + return self.cnn(x) + + class AIIABase(AIIA): def __init__(self, config: AIIAConfig, **kwargs): super().__init__(config=config, **kwargs) @@ -51,53 +89,61 @@ class AIIABase(AIIA): return self.cnn(x) class AIIAExpert(AIIA): - def __init__(self, config: AIIAConfig, **kwargs): - super().__init__(config=config, **kwargs) + def __init__(self, config: AIIAConfig, base_class=AIIABase, **kwargs): + super().__init__(config=config, **kwargs) self.config = self.config - # Initialize base CNN with configuration - self.base_cnn = AIIABase(self.config, **kwargs) - - def forward(self, x): - return self.base_cnn(x) + # Initialize base CNN with configuration and chosen base class + if issubclass(base_class, AIIABase): + self.base_cnn = AIIABase(self.config, **kwargs) + elif issubclass(base_class, AIIABaseShared): + self.base_cnn = AIIABaseShared(self.config, **kwargs) + else: + raise ValueError("Invalid base class") class AIIAmoe(AIIA): - def __init__(self, config: AIIAConfig, num_experts: int = 3, **kwargs): - super().__init__(config=config, **kwargs) + def __init__(self, config: AIIAConfig, num_experts: int = 3, base_class=AIIABase, **kwargs): + super().__init__(config=config, **kwargs) self.config = self.config # Update config with new parameters if provided self.config.num_experts = num_experts - # Initialize multiple experts + # Initialize multiple experts using chosen base class self.experts = nn.ModuleList([ - AIIAExpert(self.config, **kwargs) for _ in range(num_experts) - ]) + AIIAExpert(self.config, base_class=base_class, **kwargs) + for _ in range(self.config.num_experts) + ]) # Create gating network self.gate = nn.Sequential( - nn.Linear(self.config.hidden_size, num_experts), + nn.Linear(self.config.hidden_size, self.config.num_experts), nn.Softmax(dim=1) - ) + ) def forward(self, x): expert_outputs = torch.stack([expert(x) for expert in self.experts], dim=1) gate_weights = self.gate(torch.mean(expert_outputs, (2, 3))) merged_output = torch.sum( expert_outputs * gate_weights.unsqueeze(2).unsqueeze(3), dim=1 - ) + ) return merged_output class AIIAchunked(AIIA): - def __init__(self, config: AIIAConfig, patch_size: int = 16, **kwargs): + def __init__(self, config: AIIAConfig, patch_size: int = 16, base_class=AIIABase, **kwargs): super().__init__(config=config, **kwargs) self.config = self.config # Update config with new parameters if provided self.config.patch_size = patch_size - # Initialize base CNN for processing each patch - self.base_cnn = AIIABase(self.config, **kwargs) + # Initialize base CNN for processing each patch using the specified base class + if issubclass(base_class, AIIABase): + self.base_cnn = AIIABase(self.config, **kwargs) + elif issubclass(base_class, AIIABaseShared): # Add support for AIIABaseShared + self.base_cnn = AIIABaseShared(self.config, **kwargs) + else: + raise ValueError("Invalid base class") def forward(self, x): patches = x.unfold(2, self.patch_size, self.patch_size).unfold(3, self.patch_size, self.patch_size) @@ -112,8 +158,8 @@ class AIIAchunked(AIIA): combined_output = torch.mean(torch.stack(patch_outputs, dim=0), dim=0) return combined_output -class AIIAresursive(AIIA): - def __init__(self, config: AIIAConfig, recursion_depth: int = 3, **kwargs): +class AIIArecursive(AIIA): + def __init__(self, config: AIIAConfig, recursion_depth: int = 3, base_class=AIIABase, **kwargs): super().__init__(config=config, **kwargs) self.config = self.config @@ -122,7 +168,7 @@ class AIIAresursive(AIIA): self.config.recursion_depth = recursion_depth # Initialize chunked CNN with updated config - self.chunked_cnn = AIIAchunked(self.config, **kwargs) + self.chunked_cnn = AIIAchunked(self.config, base_class, **kwargs) def forward(self, x, depth=0): if depth == self.recursion_depth: diff --git a/src/pretrain.py b/src/pretrain.py new file mode 100644 index 0000000..daea216 --- /dev/null +++ b/src/pretrain.py @@ -0,0 +1,149 @@ +import torch +from torch import nn +from torch.utils.data import Dataset, DataLoader +from torchvision import transforms +from PIL import Image +import os +import random +import pandas as pd +from aiia.model.config import AIIAConfig +from aiia.model import AIIABase +from aiia.data.DataLoader import AIIADataLoader + +def pretrain_model(data_path1, data_path2, num_epochs=3): + # Merge the two parquet files + df1 = pd.read_parquet(data_path1) + df2 = pd.read_parquet(data_path2) + merged_df = pd.concat([df1, df2], ignore_index=True) + + # Create a new AIIAConfig instance + config = AIIAConfig( + model_name="AIIA-512x", + hidden_size=512, + num_hidden_layers=12, + kernel_size=5, + learning_rate=5e-5 + ) + + # Initialize the base model + model = AIIABase(config) + + # Create dataset loader with merged data + train_dataset = AIIADataLoader( + merged_df, + batch_size=32, + val_split=0.2, + seed=42, + column="file_path", + label_column=None + ) + + # Create separate dataloaders for training and validation sets + train_dataloader = DataLoader( + train_dataset.train_dataset, + batch_size=train_dataset.batch_size, + shuffle=True, + num_workers=4 + ) + + val_dataloader = DataLoader( + train_dataset.val_ataset, + batch_size=train_dataset.batch_size, + shuffle=False, + num_workers=4 + ) + + # Initialize loss functions and optimizer + criterion_denoise = nn.MSELoss() + criterion_rotate = nn.CrossEntropyLoss() + + optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate) + + device = "cuda" if torch.cuda.is_available() else "cpu" + model.to(device) + + best_val_loss = float('inf') + + for epoch in range(num_epochs): + print(f"\nEpoch {epoch+1}/{num_epochs}") + print("-" * 20) + + # Training phase + model.train() + total_train_loss = 0.0 + denoise_train_loss = 0.0 + rotate_train_loss = 0.0 + + for batch in train_dataloader: + images, targets, tasks = zip(*batch) + + if device == "cuda": + images = [img.cuda() for img in images] + targets = [t.cuda() for t in targets] + + optimizer.zero_grad() + + # Process each sample individually since tasks can vary + outputs = [] + total_loss = 0.0 + for i, (image, target, task) in enumerate(zip(images, targets, tasks)): + output = model(image.unsqueeze(0)) + + if task == 'denoise': + loss = criterion_denoise(output.squeeze(), target) + elif task == 'rotate': + loss = criterion_rotate(output.view(-1, len(set(outputs))), target) + + total_loss += loss + outputs.append(output) + + avg_loss = total_loss / len(images) + avg_loss.backward() + optimizer.step() + + total_train_loss += avg_loss.item() + # Separate losses for reporting (you'd need to track this based on tasks) + + avg_total_train_loss = total_train_loss / len(train_dataloader) + print(f"Training Loss: {avg_total_train_loss:.4f}") + + # Validation phase + model.eval() + with torch.no_grad(): + val_losses = [] + for batch in val_dataloader: + images, targets, tasks = zip(*batch) + + if device == "cuda": + images = [img.cuda() for img in images] + targets = [t.cuda() for t in targets] + + outputs = [] + total_loss = 0.0 + for i, (image, target, task) in enumerate(zip(images, targets, tasks)): + output = model(image.unsqueeze(0)) + + if task == 'denoise': + loss = criterion_denoise(output.squeeze(), target) + elif task == 'rotate': + loss = criterion_rotate(output.view(-1, len(set(outputs))), target) + + total_loss += loss + outputs.append(output) + + avg_val_loss = total_loss / len(images) + val_losses.append(avg_val_loss.item()) + + avg_val_loss = sum(val_losses) / len(val_dataloader) + print(f"Validation Loss: {avg_val_loss:.4f}") + + # Save the best model + if avg_val_loss < best_val_loss: + best_val_loss = avg_val_loss + model.save("BASEv0.1") + print("Best model saved!") + +if __name__ == "__main__": + data_path1 = "/root/training_data/vision-dataset/images_dataset.parquet" + data_path2 = "/root/training_data/vision-dataset/vec_images_dataset.parquet" + pretrain_model(data_path1, data_path2, num_epochs=8) \ No newline at end of file From 8ac31c5bf1ad4bbdd74f55ca0e2f137fe795e1b5 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Fri, 24 Jan 2025 18:23:54 +0100 Subject: [PATCH 17/55] improved shared model to have ~10% of params --- src/aiia/model/Model.py | 83 +++++++++++++++++++++++++++-------------- 1 file changed, 56 insertions(+), 27 deletions(-) diff --git a/src/aiia/model/Model.py b/src/aiia/model/Model.py index 594a81c..8490c47 100644 --- a/src/aiia/model/Model.py +++ b/src/aiia/model/Model.py @@ -28,41 +28,65 @@ class AIIA(nn.Module): return model class AIIABaseShared(AIIA): - """ - Base class with parameter sharing. - All hidden layers share the same weights - """ - def __init__(self, config: AIIAConfig, **kwargs): + def __init__(self, config: AIIAConfig, num_shared_layers=1, **kwargs): super().__init__(config=config, **kwargs) self.config = copy.deepcopy(config) - + self.config.num_shared_layers = num_shared_layers # Update config with new parameters if provided for key, value in kwargs.items(): setattr(self.config, key, value) - # Initialize shared layers - self.conv_layer = nn.Conv2d( - self.config.num_channels, - self.config.hidden_size, - kernel_size=self.config.kernel_size, - padding=1 - ) + # Shared layers (early stages) use the same kernel + self.shared_layers = nn.ModuleList() + for _ in range(self.config.num_shared_layers): + layer = nn.Conv2d( + self.config.num_channels, + self.config.hidden_size, + kernel_size=self.config.kernel_size, + padding=1 + ) + # Initialize with shared weights if it's the first layer + if len(self.shared_layers) == 0: + self.shared_weights = layer.weight + self.shared_biases = nn.ParameterList([ + nn.Parameter(torch.zeros(self.config.hidden_size)) + for _ in range(self.config.num_shared_layers) + ]) + else: + layer.weight = self.shared_weights + # Assign separate biases + layer.bias = self.shared_biases[len(self.shared_layers)] + self.shared_layers.append(layer) + + # Unique layers (later stages) have their own weights and biases + self.unique_layers = nn.ModuleList() + in_channels = self.config.hidden_size + for _ in range(self.config.num_shared_layers): + self.unique_layers.append( + nn.Conv2d( + in_channels, + self.config.hidden_size, + kernel_size=self.config.kernel_size, + padding=1 + ) + ) + + # Activation and pooling layers self.activation_function = getattr(nn, self.config.activation_function)() - self.max_pool = nn.MaxPool2d(kernel_size=2) - - # Create a Sequential container with shared layers repeated - layers = [] - for _ in range(self.config.num_hidden_layers): - layers.extend([ - self.conv_layer, - self.activation_function, - self.max_pool - ]) - - self.cnn = nn.Sequential(*layers) + self.max_pool = nn.MaxPool2d(self.config.kernel_size) def forward(self, x): - return self.cnn(x) + for layer in self.shared_layers: + x = layer(x) + x = self.activation_function(x) + x = self.max_pool(x) + + for layer in self.unique_layers: + x = layer(x) + x = self.activation_function(x) + x = self.max_pool(x) + + return x class AIIABase(AIIA): @@ -184,4 +208,9 @@ class AIIArecursive(AIIA): processed_patches.append(pp) combined_output = torch.mean(torch.stack(processed_patches, dim=0), dim=0) - return combined_output \ No newline at end of file + return combined_output + +config = AIIAConfig() +model2 = AIIABaseShared(config) + +model2.save("shared") \ No newline at end of file From de3d58f6db555e9cf32396b993c90f92a529b5dd Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Fri, 24 Jan 2025 18:49:25 +0100 Subject: [PATCH 18/55] improved cnn --- src/aiia/model/Model.py | 117 ++++++++++++++++++++++------------------ 1 file changed, 65 insertions(+), 52 deletions(-) diff --git a/src/aiia/model/Model.py b/src/aiia/model/Model.py index 8490c47..a454c8c 100644 --- a/src/aiia/model/Model.py +++ b/src/aiia/model/Model.py @@ -26,68 +26,81 @@ class AIIA(nn.Module): model = cls(config) model.load_state_dict(torch.load(f"{path}/model.pth")) return model - + + class AIIABaseShared(AIIA): - def __init__(self, config: AIIAConfig, num_shared_layers=1, **kwargs): + def __init__(self, config: AIIAConfig, **kwargs): + """ + Initialize the AIIABaseShared model. + + Args: + config (AIIAConfig): Configuration object containing model parameters. + **kwargs: Additional keyword arguments to override configuration settings. + """ super().__init__(config=config, **kwargs) - self.config = copy.deepcopy(config) - self.config.num_shared_layers = num_shared_layers - # Update config with new parameters if provided + + # Update configuration with new parameters if provided + self. config = copy.deepcopy(config) + for key, value in kwargs.items(): setattr(self.config, key, value) + + # Initialize the network components + self._initialize_network() + self._initialize_activation_andPooling() - # Shared layers (early stages) use the same kernel - self.shared_layers = nn.ModuleList() - for _ in range(self.config.num_shared_layers): - layer = nn.Conv2d( - self.config.num_channels, - self.config.hidden_size, - kernel_size=self.config.kernel_size, - padding=1 - ) - # Initialize with shared weights if it's the first layer - if len(self.shared_layers) == 0: - self.shared_weights = layer.weight - self.shared_biases = nn.ParameterList([ - nn.Parameter(torch.zeros(self.config.hidden_size)) - for _ in range(self.config.num_shared_layers) - ]) - else: - layer.weight = self.shared_weights - # Assign separate biases - layer.bias = self.shared_biases[len(self.shared_layers)] - self.shared_layers.append(layer) - - # Unique layers (later stages) have their own weights and biases + def _initialize_network(self): + """Initialize the shared and unique layers of the network.""" + # Create a single shared convolutional layer + self.shared_layer = nn.Conv2d( + in_channels=self.config.num_channels, + out_channels=self.config.hidden_size, + kernel_size=self.config.kernel_size, + padding=1 # Using same padding as defined in config + ) + + # Initialize the unique layers with separate weights and biases self.unique_layers = nn.ModuleList() - in_channels = self.config.hidden_size - for _ in range(self.config.num_shared_layers): - self.unique_layers.append( - nn.Conv2d( - in_channels, - self.config.hidden_size, - kernel_size=self.config.kernel_size, - padding=1 - ) - ) + current_in_channels = self.config.hidden_size + + layer = nn.Conv2d( + in_channels=current_in_channels, + out_channels=self.config.hidden_size, + kernel_size=self.config.kernel_size, + padding=1 # Using same padding as defined in config + ) + + self.unique_layers.append(layer) - # Activation and pooling layers - self.activation_function = getattr(nn, self.config.activation_function)() - self.max_pool = nn.MaxPool2d(self.config.kernel_size) + def _initialize_activation_andPooling(self): + """Initialize activation function and pooling layers.""" + # Get activation function from nn module + self.activation = getattr(nn, self.config.activation_function)() + + # Initialize max pooling layer + self.max_pool = nn.MaxPool2d( + kernel_size=self.config.kernel_size, + padding=1 # Using same padding as in Conv2d layers + ) def forward(self, x): - for layer in self.shared_layers: - x = layer(x) - x = self.activation_function(x) - x = self.max_pool(x) + """Forward pass of the network.""" + # Apply shared layer transformation + out = self.shared_layer(x) - for layer in self.unique_layers: - x = layer(x) - x = self.activation_function(x) - x = self.max_pool(x) - - return x - + # Pass through activation function + out = self.activation(out) + + # Apply max pooling + out = self.max_pool(out) + + # Pass through unique layers + for unique_layer in self.unique_layers: + out = unique_layer(out) + out = self.activation(out) + out = self.max_pool(out) + + return out class AIIABase(AIIA): def __init__(self, config: AIIAConfig, **kwargs): From 32daaadddd58f61ba78d5e609428ce6a6a8ee6f9 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 13:05:24 +0100 Subject: [PATCH 19/55] updated pretrain method --- src/pretrain.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index daea216..fea60a0 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -34,7 +34,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): batch_size=32, val_split=0.2, seed=42, - column="file_path", + column="image_bytes", label_column=None ) @@ -71,8 +71,6 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): # Training phase model.train() total_train_loss = 0.0 - denoise_train_loss = 0.0 - rotate_train_loss = 0.0 for batch in train_dataloader: images, targets, tasks = zip(*batch) @@ -144,6 +142,6 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): print("Best model saved!") if __name__ == "__main__": - data_path1 = "/root/training_data/vision-dataset/images_dataset.parquet" + data_path1 = "/root/training_data/vision-dataset/images_checkpoint.parquet" data_path2 = "/root/training_data/vision-dataset/vec_images_dataset.parquet" pretrain_model(data_path1, data_path2, num_epochs=8) \ No newline at end of file From e5a56181604a77034e3bd12a28ef1c9773f9bb7f Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 13:09:23 +0100 Subject: [PATCH 20/55] correct copying --- src/aiia/model/Model.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/aiia/model/Model.py b/src/aiia/model/Model.py index a454c8c..fafab52 100644 --- a/src/aiia/model/Model.py +++ b/src/aiia/model/Model.py @@ -1,8 +1,9 @@ -from config import AIIAConfig +from .config import AIIAConfig from torch import nn import torch import os -import copy # Add this for deep copying +import copy + class AIIA(nn.Module): def __init__(self, config: AIIAConfig, **kwargs): From 59b2784e924c9d6329c702ce6e52797c3ab88b33 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 13:10:24 +0100 Subject: [PATCH 21/55] fixed spelling error --- src/aiia/model/Model.py | 7 +------ src/aiia/model/__init__.py | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/aiia/model/Model.py b/src/aiia/model/Model.py index fafab52..a9584fa 100644 --- a/src/aiia/model/Model.py +++ b/src/aiia/model/Model.py @@ -222,9 +222,4 @@ class AIIArecursive(AIIA): processed_patches.append(pp) combined_output = torch.mean(torch.stack(processed_patches, dim=0), dim=0) - return combined_output - -config = AIIAConfig() -model2 = AIIABaseShared(config) - -model2.save("shared") \ No newline at end of file + return combined_output \ No newline at end of file diff --git a/src/aiia/model/__init__.py b/src/aiia/model/__init__.py index 5757152..66cec7e 100644 --- a/src/aiia/model/__init__.py +++ b/src/aiia/model/__init__.py @@ -1,2 +1,2 @@ from .config import AIIAConfig -from .Model import AIIA, AIIABase, AIIAchunked, AIIAExpert, AIIAmoe, AIIAresursive \ No newline at end of file +from .Model import AIIA, AIIABase, AIIAchunked, AIIAExpert, AIIAmoe, AIIArecursive \ No newline at end of file From 338ac5dee59a7603c775ca39ec0f0c71a6aa217c Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 13:26:01 +0100 Subject: [PATCH 22/55] corrected imports --- src/aiia/__init__.py | 3 +-- src/aiia/model/__init__.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/aiia/__init__.py b/src/aiia/__init__.py index 25b8128..71acf48 100644 --- a/src/aiia/__init__.py +++ b/src/aiia/__init__.py @@ -1,4 +1,3 @@ -# Import submodules -from .model import AIIA, AIIAEncoder +from .model import AIIA, AIIABase, AIIAchunked, AIIAExpert, AIIAmoe, AIIArecursive, AIIABaseShared from .data import AIIADataLoader from .model.config import AIIAConfig \ No newline at end of file diff --git a/src/aiia/model/__init__.py b/src/aiia/model/__init__.py index 66cec7e..0e6a459 100644 --- a/src/aiia/model/__init__.py +++ b/src/aiia/model/__init__.py @@ -1,2 +1,2 @@ from .config import AIIAConfig -from .Model import AIIA, AIIABase, AIIAchunked, AIIAExpert, AIIAmoe, AIIArecursive \ No newline at end of file +from .Model import AIIA, AIIABase, AIIAchunked, AIIAExpert, AIIAmoe, AIIArecursive, AIIABaseShared \ No newline at end of file From 00168af32de4706abef73a3f6cc7e4f165c3da60 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 13:48:52 +0100 Subject: [PATCH 23/55] corrected init --- src/aiia/data/DataLoader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index d3cb900..bcfe28a 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -92,7 +92,7 @@ class AIIADataLoader(DataLoader): seed=42, column="file_path", label_column=None): - super().__init__() + super().__init__(dataset) self.batch_size = batch_size self.val_split = val_split From b7dc835a86aff011bf6c72d51613df620d734568 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 16:20:33 +0100 Subject: [PATCH 24/55] fixed dataloading --- src/aiia/data/DataLoader.py | 32 ++++++++++++++++++-------------- src/pretrain.py | 30 +++++++++--------------------- 2 files changed, 27 insertions(+), 35 deletions(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index bcfe28a..45daa7d 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -85,21 +85,19 @@ class JPGImageLoader: print(f"Skipped {self.skipped_count} images due to errors.") -class AIIADataLoader(DataLoader): +class AIIADataLoader: def __init__(self, dataset, batch_size=32, val_split=0.2, seed=42, column="file_path", - label_column=None): - super().__init__(dataset) - + label_column=None, + **dataloader_kwargs): self.batch_size = batch_size self.val_split = val_split self.seed = seed + random.seed(seed) - # Determine which loader to use based on the dataset's content - # Check if any entry in bytes_column is a bytes or bytestring type is_bytes_or_bytestring = any( isinstance(value, (bytes, memoryview)) for value in dataset[column].dropna().head(1).astype(str) @@ -112,10 +110,8 @@ class AIIADataLoader(DataLoader): label_column=label_column ) else: - # Check if file_path column contains valid image file paths (at least one entry) sample_paths = dataset[column].dropna().head(1).astype(str) - # Regex pattern for matching image file paths (adjust as needed) filepath_pattern = r'.*(?:/|\\).*\.([jJ][pP][gG]|png|gif)$' if any( @@ -128,23 +124,33 @@ class AIIADataLoader(DataLoader): label_column=label_column ) else: - # If neither condition is met, default to JPGImageLoader (assuming bytes are stored as strings) self.loader = JPGImageLoader( dataset, bytes_column=column, label_column=label_column ) - # Get all items self.items = [self.loader.get_item(idx) for idx in range(len(dataset))] - # Split into train and validation sets train_indices, val_indices = self._split_data() - # Create datasets for training and validation self.train_dataset = self._create_subset(train_indices) self.val_dataset = self._create_subset(val_indices) + self.train_loader = DataLoader( + self.train_dataset, + batch_size=batch_size, + shuffle=True, + **dataloader_kwargs + ) + + self.val_loader = DataLoader( + self.val_dataset, + batch_size=batch_size, + shuffle=False, + **dataloader_kwargs + ) + def _split_data(self): if len(self.items) == 0: return [], [] @@ -184,7 +190,6 @@ class AIIADataset(torch.utils.data.Dataset): return (image, label) elif isinstance(item, tuple) and len(item) == 3: image, task, label = item - # Handle tasks accordingly (e.g., apply different augmentations) if task == 'denoise': noise_std = 0.1 noisy_img = image + torch.randn_like(image) * noise_std @@ -199,7 +204,6 @@ class AIIADataset(torch.utils.data.Dataset): else: raise ValueError(f"Unknown task: {task}") else: - # Handle single images without labels or tasks if isinstance(item, Image.Image): return item else: diff --git a/src/pretrain.py b/src/pretrain.py index fea60a0..6fc9922 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -29,29 +29,17 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): model = AIIABase(config) # Create dataset loader with merged data - train_dataset = AIIADataLoader( + aiia_loader = AIIADataLoader( merged_df, batch_size=32, val_split=0.2, seed=42, - column="image_bytes", - label_column=None + column="image_bytes" ) - # Create separate dataloaders for training and validation sets - train_dataloader = DataLoader( - train_dataset.train_dataset, - batch_size=train_dataset.batch_size, - shuffle=True, - num_workers=4 - ) - - val_dataloader = DataLoader( - train_dataset.val_ataset, - batch_size=train_dataset.batch_size, - shuffle=False, - num_workers=4 - ) + # Access the train and validation loaders + train_loader = aiia_loader.train_loader + val_loader = aiia_loader.val_loader # Initialize loss functions and optimizer criterion_denoise = nn.MSELoss() @@ -72,7 +60,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): model.train() total_train_loss = 0.0 - for batch in train_dataloader: + for batch in train_loader: images, targets, tasks = zip(*batch) if device == "cuda": @@ -102,14 +90,14 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): total_train_loss += avg_loss.item() # Separate losses for reporting (you'd need to track this based on tasks) - avg_total_train_loss = total_train_loss / len(train_dataloader) + avg_total_train_loss = total_train_loss / len(train_loader) print(f"Training Loss: {avg_total_train_loss:.4f}") # Validation phase model.eval() with torch.no_grad(): val_losses = [] - for batch in val_dataloader: + for batch in val_loader: images, targets, tasks = zip(*batch) if device == "cuda": @@ -132,7 +120,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): avg_val_loss = total_loss / len(images) val_losses.append(avg_val_loss.item()) - avg_val_loss = sum(val_losses) / len(val_dataloader) + avg_val_loss = sum(val_losses) / len(val_loader) print(f"Validation Loss: {avg_val_loss:.4f}") # Save the best model From 9124221346f2476221ee4b39b9ecf2de396e56a2 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 17:26:13 +0100 Subject: [PATCH 25/55] correct loading files --- src/aiia/data/DataLoader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index 45daa7d..51da94e 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -53,7 +53,7 @@ class JPGImageLoader: self.successful_count = 0 self.skipped_count = 0 - if self.bytes_column not in dataset.column_names: + if self.bytes_column not in dataset.columns: raise ValueError(f"Column '{self.bytes_column}' not found in dataset.") def _get_image(self, item): From b5da2e477d62bdef7e1e74b59d35e6fb3da31dbf Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 18:15:05 +0100 Subject: [PATCH 26/55] correct allogcation for indexes --- src/aiia/data/DataLoader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index 51da94e..d80b2c7 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -28,7 +28,7 @@ class FilePathLoader: return None def get_item(self, idx): - item = self.dataset[idx] + item = self.dataset.iloc[idx] image = self._get_image(item) if image is not None: self.successful_count += 1 @@ -67,7 +67,7 @@ class JPGImageLoader: return None def get_item(self, idx): - item = self.dataset[idx] + item = self.dataset.iloc[idx] image = self._get_image(item) if image is not None: self.successful_count += 1 From 51da6b5aa2764614e981fc6e99fe5606d30375c8 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 18:19:54 +0100 Subject: [PATCH 27/55] corrected loading --- src/aiia/data/DataLoader.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index d80b2c7..1813749 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -5,7 +5,7 @@ from torch.utils.data import DataLoader from torchvision import transforms import random import re - +import base64 class FilePathLoader: def __init__(self, dataset, file_path_column="file_path", label_column=None): @@ -55,16 +55,26 @@ class JPGImageLoader: if self.bytes_column not in dataset.columns: raise ValueError(f"Column '{self.bytes_column}' not found in dataset.") - + def _get_image(self, item): try: - bytes_data = item[self.bytes_column] + # Retrieve the string data + data = item[self.bytes_column] + + # Check if the data is a string, and decode it + if isinstance(data, str): + bytes_data = base64.b64decode(data) # Adjust decoding as per your data encoding format + else: + bytes_data = data + + # Load the bytes into a BytesIO object and open the image img_bytes = io.BytesIO(bytes_data) image = Image.open(img_bytes).convert("RGB") return image except Exception as e: print(f"Error loading image from bytes: {e}") return None + def get_item(self, idx): item = self.dataset.iloc[idx] From e0abdb9d39ef782ed5eecbbcf167b323c038beae Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 18:40:19 +0100 Subject: [PATCH 28/55] fixed datasplitting --- src/aiia/data/DataLoader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index 1813749..98b882b 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -165,7 +165,8 @@ class AIIADataLoader: if len(self.items) == 0: return [], [] - tasks = [item[1] if len(item) > 1 and hasattr(item, '__getitem__') else None for item in self.items] + tasks = [item[1] for item in self.items if len(item) > 1 and hasattr(item, '__getitem__') and item[1] is not None] + unique_tasks = list(set(tasks)) if tasks.count(None) < len(tasks) else [] train_indices = [] From a8cd9b00e5823c8593ec534d56c1022035c6597f Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 18:41:14 +0100 Subject: [PATCH 29/55] limit data loadng to 10k --- src/pretrain.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index 6fc9922..09e9856 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -12,8 +12,8 @@ from aiia.data.DataLoader import AIIADataLoader def pretrain_model(data_path1, data_path2, num_epochs=3): # Merge the two parquet files - df1 = pd.read_parquet(data_path1) - df2 = pd.read_parquet(data_path2) + df1 = pd.read_parquet(data_path1).head(10000) + df2 = pd.read_parquet(data_path2).head(10000) merged_df = pd.concat([df1, df2], ignore_index=True) # Create a new AIIAConfig instance From 7a1eb8bd3098360035b835d83a5a8a7dfe091bca Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 21:58:13 +0100 Subject: [PATCH 30/55] updated loader --- src/aiia/data/DataLoader.py | 102 ++++++++++++------------------------ src/pretrain.py | 14 +---- 2 files changed, 36 insertions(+), 80 deletions(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index 98b882b..223d146 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -58,23 +58,22 @@ class JPGImageLoader: def _get_image(self, item): try: - # Retrieve the string data data = item[self.bytes_column] - # Check if the data is a string, and decode it - if isinstance(data, str): - bytes_data = base64.b64decode(data) # Adjust decoding as per your data encoding format + if isinstance(data, str) and data.startswith("b'"): + cleaned_data = data[2:-1].encode('latin1').decode('unicode-escape').encode('latin1') + bytes_data = cleaned_data + elif isinstance(data, str): + bytes_data = base64.b64decode(data) else: bytes_data = data - # Load the bytes into a BytesIO object and open the image img_bytes = io.BytesIO(bytes_data) image = Image.open(img_bytes).convert("RGB") return image except Exception as e: print(f"Error loading image from bytes: {e}") return None - def get_item(self, idx): item = self.dataset.iloc[idx] @@ -93,94 +92,61 @@ class JPGImageLoader: def print_summary(self): print(f"Successfully converted {self.successful_count} images.") print(f"Skipped {self.skipped_count} images due to errors.") - class AIIADataLoader: - def __init__(self, dataset, - batch_size=32, - val_split=0.2, - seed=42, - column="file_path", - label_column=None, - **dataloader_kwargs): + def __init__(self, dataset, batch_size=32, val_split=0.2, seed=42, column="file_path", label_column=None, **dataloader_kwargs): self.batch_size = batch_size self.val_split = val_split self.seed = seed random.seed(seed) - is_bytes_or_bytestring = any( - isinstance(value, (bytes, memoryview)) - for value in dataset[column].dropna().head(1).astype(str) + sample_value = dataset[column].iloc[0] + is_bytes_or_bytestring = isinstance(sample_value, (bytes, str)) and ( + isinstance(sample_value, bytes) or + sample_value.startswith("b'") or + sample_value.startswith(('b"', 'data:image')) ) if is_bytes_or_bytestring: - self.loader = JPGImageLoader( - dataset, - bytes_column=column, - label_column=label_column - ) + self.loader = JPGImageLoader(dataset, bytes_column=column, label_column=label_column) else: sample_paths = dataset[column].dropna().head(1).astype(str) + filepath_pattern = r'.*(?:/|\\).*\.([jJ][pP][gG]|[pP][nN][gG]|[gG][iI][fF])$' - filepath_pattern = r'.*(?:/|\\).*\.([jJ][pP][gG]|png|gif)$' - - if any( - re.match(filepath_pattern, path, flags=re.IGNORECASE) - for path in sample_paths - ): - self.loader = FilePathLoader( - dataset, - file_path_column=column, - label_column=label_column - ) + if any(re.match(filepath_pattern, path, flags=re.IGNORECASE) for path in sample_paths): + self.loader = FilePathLoader(dataset, file_path_column=column, label_column=label_column) else: - self.loader = JPGImageLoader( - dataset, - bytes_column=column, - label_column=label_column - ) + self.loader = JPGImageLoader(dataset, bytes_column=column, label_column=label_column) - self.items = [self.loader.get_item(idx) for idx in range(len(dataset))] + self.items = [] + for idx in range(len(dataset)): + item = self.loader.get_item(idx) + if item is not None: + self.items.append(item) + if not self.items: + raise ValueError("No valid items were loaded from the dataset") + train_indices, val_indices = self._split_data() self.train_dataset = self._create_subset(train_indices) self.val_dataset = self._create_subset(val_indices) - self.train_loader = DataLoader( - self.train_dataset, - batch_size=batch_size, - shuffle=True, - **dataloader_kwargs - ) - - self.val_loader = DataLoader( - self.val_dataset, - batch_size=batch_size, - shuffle=False, - **dataloader_kwargs - ) + self.train_loader = DataLoader(self.train_dataset, batch_size=batch_size, shuffle=True, **dataloader_kwargs) + self.val_loader = DataLoader(self.val_dataset, batch_size=batch_size, shuffle=False, **dataloader_kwargs) def _split_data(self): if len(self.items) == 0: - return [], [] + raise ValueError("No items to split") - tasks = [item[1] for item in self.items if len(item) > 1 and hasattr(item, '__getitem__') and item[1] is not None] - - unique_tasks = list(set(tasks)) if tasks.count(None) < len(tasks) else [] + num_samples = len(self.items) + indices = list(range(num_samples)) + random.shuffle(indices) - train_indices = [] - val_indices = [] + split_idx = int((1 - self.val_split) * num_samples) + train_indices = indices[:split_idx] + val_indices = indices[split_idx:] - for task in unique_tasks: - task_indices = [i for i, t in enumerate(tasks) if t == task] - n_val = int(len(task_indices) * self.val_split) - - random.shuffle(task_indices) - - val_indices.extend(task_indices[:n_val]) - train_indices.extend(task_indices[n_val:]) - return train_indices, val_indices def _create_subset(self, indices): @@ -218,4 +184,4 @@ class AIIADataset(torch.utils.data.Dataset): if isinstance(item, Image.Image): return item else: - raise ValueError("Invalid item format.") + raise ValueError("Invalid item format.") \ No newline at end of file diff --git a/src/pretrain.py b/src/pretrain.py index 09e9856..78ce63a 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -18,24 +18,14 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): # Create a new AIIAConfig instance config = AIIAConfig( - model_name="AIIA-512x", - hidden_size=512, - num_hidden_layers=12, - kernel_size=5, - learning_rate=5e-5 + model_name="AIIA-Base-512x20k", ) # Initialize the base model model = AIIABase(config) # Create dataset loader with merged data - aiia_loader = AIIADataLoader( - merged_df, - batch_size=32, - val_split=0.2, - seed=42, - column="image_bytes" - ) + aiia_loader = AIIADataLoader(merged_df, column="image_bytes", batch_size=32) # Access the train and validation loaders train_loader = aiia_loader.train_loader From cae3fa7fb30ca96208a3b6de2eaea461fd313cab Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 22:08:59 +0100 Subject: [PATCH 31/55] proper image transformation --- src/aiia/data/DataLoader.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index 223d146..1f5d75a 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -152,10 +152,13 @@ class AIIADataLoader: def _create_subset(self, indices): subset_items = [self.items[i] for i in indices] return AIIADataset(subset_items) - + class AIIADataset(torch.utils.data.Dataset): def __init__(self, items): self.items = items + self.transform = transforms.Compose([ + transforms.ToTensor() + ]) def __len__(self): return len(self.items) @@ -164,9 +167,14 @@ class AIIADataset(torch.utils.data.Dataset): item = self.items[idx] if isinstance(item, tuple) and len(item) == 2: image, label = item + # Convert PIL image to tensor + image = self.transform(image) return (image, label) elif isinstance(item, tuple) and len(item) == 3: image, task, label = item + # Convert PIL image to tensor first + image = self.transform(image) + if task == 'denoise': noise_std = 0.1 noisy_img = image + torch.randn_like(image) * noise_std @@ -182,6 +190,7 @@ class AIIADataset(torch.utils.data.Dataset): raise ValueError(f"Unknown task: {task}") else: if isinstance(item, Image.Image): - return item + # Convert single PIL image to tensor + return self.transform(item) else: - raise ValueError("Invalid item format.") \ No newline at end of file + raise ValueError("Invalid item format.") From 3f6e6514a9770523f248b5874c0e8a264e695a62 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 22:17:37 +0100 Subject: [PATCH 32/55] rgba conversion to rgb --- src/aiia/data/DataLoader.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index 1f5d75a..78f4eb5 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -21,7 +21,13 @@ class FilePathLoader: def _get_image(self, item): try: path = item[self.file_path_column] - image = Image.open(path).convert("RGB") + image = Image.open(path) + if image.mode == 'RGBA': + background = Image.new('RGB', image.size, (0, 0, 0)) + background.paste(image, mask=image.split()[3]) + image = background + elif image.mode != 'RGB': + image = image.convert('RGB') return image except Exception as e: print(f"Error loading image from {path}: {e}") @@ -69,7 +75,13 @@ class JPGImageLoader: bytes_data = data img_bytes = io.BytesIO(bytes_data) - image = Image.open(img_bytes).convert("RGB") + image = Image.open(img_bytes) + if image.mode == 'RGBA': + background = Image.new('RGB', image.size, (0, 0, 0)) + background.paste(image, mask=image.split()[3]) + image = background + elif image.mode != 'RGB': + image = image.convert('RGB') return image except Exception as e: print(f"Error loading image from bytes: {e}") @@ -167,12 +179,10 @@ class AIIADataset(torch.utils.data.Dataset): item = self.items[idx] if isinstance(item, tuple) and len(item) == 2: image, label = item - # Convert PIL image to tensor image = self.transform(image) return (image, label) elif isinstance(item, tuple) and len(item) == 3: image, task, label = item - # Convert PIL image to tensor first image = self.transform(image) if task == 'denoise': @@ -190,7 +200,6 @@ class AIIADataset(torch.utils.data.Dataset): raise ValueError(f"Unknown task: {task}") else: if isinstance(item, Image.Image): - # Convert single PIL image to tensor return self.transform(item) else: raise ValueError("Invalid item format.") From 7c4aef09789a14f42f42b616f0bebca2aaddba76 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 22:48:29 +0100 Subject: [PATCH 33/55] updated dataloader to work with tupels --- src/aiia/data/DataLoader.py | 44 ++++++++++------- src/pretrain.py | 94 ++++++++++++++----------------------- 2 files changed, 61 insertions(+), 77 deletions(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index 78f4eb5..567954f 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -106,10 +106,11 @@ class JPGImageLoader: print(f"Skipped {self.skipped_count} images due to errors.") class AIIADataLoader: - def __init__(self, dataset, batch_size=32, val_split=0.2, seed=42, column="file_path", label_column=None, **dataloader_kwargs): + def __init__(self, dataset, batch_size=32, val_split=0.2, seed=42, column="file_path", label_column=None, pretraining=False, **dataloader_kwargs): self.batch_size = batch_size self.val_split = val_split self.seed = seed + self.pretraining = pretraining random.seed(seed) sample_value = dataset[column].iloc[0] @@ -134,7 +135,12 @@ class AIIADataLoader: for idx in range(len(dataset)): item = self.loader.get_item(idx) if item is not None: - self.items.append(item) + if self.pretraining: + img = item[0] if isinstance(item, tuple) else item + self.items.append((img, 'denoise', img)) + self.items.append((img, 'rotate', 0)) + else: + self.items.append(item) if not self.items: raise ValueError("No valid items were loaded from the dataset") @@ -163,12 +169,14 @@ class AIIADataLoader: def _create_subset(self, indices): subset_items = [self.items[i] for i in indices] - return AIIADataset(subset_items) + return AIIADataset(subset_items, pretraining=self.pretraining) class AIIADataset(torch.utils.data.Dataset): - def __init__(self, items): + def __init__(self, items, pretraining=False): self.items = items + self.pretraining = pretraining self.transform = transforms.Compose([ + transforms.Resize((224, 224)), transforms.ToTensor() ]) @@ -177,29 +185,29 @@ class AIIADataset(torch.utils.data.Dataset): def __getitem__(self, idx): item = self.items[idx] - if isinstance(item, tuple) and len(item) == 2: - image, label = item - image = self.transform(image) - return (image, label) - elif isinstance(item, tuple) and len(item) == 3: + + if self.pretraining: image, task, label = item image = self.transform(image) if task == 'denoise': noise_std = 0.1 noisy_img = image + torch.randn_like(image) * noise_std - target = image - return (noisy_img, target, task) + target = image.clone() + return noisy_img, target, task elif task == 'rotate': angles = [0, 90, 180, 270] angle = random.choice(angles) rotated_img = transforms.functional.rotate(image, angle) - target = torch.tensor(angle).long() - return (rotated_img, target, task) - else: - raise ValueError(f"Unknown task: {task}") + target = torch.tensor(angle / 90).long() + return rotated_img, target, task else: - if isinstance(item, Image.Image): - return self.transform(item) + if isinstance(item, tuple) and len(item) == 2: + image, label = item + image = self.transform(image) + return image, label else: - raise ValueError("Invalid item format.") + if isinstance(item, Image.Image): + return self.transform(item) + else: + return self.transform(item[0]) diff --git a/src/pretrain.py b/src/pretrain.py index 78ce63a..8436d51 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -1,37 +1,26 @@ import torch from torch import nn -from torch.utils.data import Dataset, DataLoader -from torchvision import transforms -from PIL import Image -import os -import random import pandas as pd from aiia.model.config import AIIAConfig from aiia.model import AIIABase from aiia.data.DataLoader import AIIADataLoader def pretrain_model(data_path1, data_path2, num_epochs=3): - # Merge the two parquet files df1 = pd.read_parquet(data_path1).head(10000) df2 = pd.read_parquet(data_path2).head(10000) merged_df = pd.concat([df1, df2], ignore_index=True) - # Create a new AIIAConfig instance config = AIIAConfig( model_name="AIIA-Base-512x20k", ) - # Initialize the base model model = AIIABase(config) - # Create dataset loader with merged data - aiia_loader = AIIADataLoader(merged_df, column="image_bytes", batch_size=32) + aiia_loader = AIIADataLoader(merged_df, column="image_bytes", batch_size=32, pretraining=True) - # Access the train and validation loaders train_loader = aiia_loader.train_loader val_loader = aiia_loader.val_loader - # Initialize loss functions and optimizer criterion_denoise = nn.MSELoss() criterion_rotate = nn.CrossEntropyLoss() @@ -46,74 +35,61 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): print(f"\nEpoch {epoch+1}/{num_epochs}") print("-" * 20) - # Training phase model.train() total_train_loss = 0.0 + denoise_losses = [] + rotate_losses = [] for batch in train_loader: - images, targets, tasks = zip(*batch) - - if device == "cuda": - images = [img.cuda() for img in images] - targets = [t.cuda() for t in targets] + noisy_imgs, targets, tasks = batch + noisy_imgs = noisy_imgs.to(device) + targets = targets.to(device) optimizer.zero_grad() - # Process each sample individually since tasks can vary - outputs = [] - total_loss = 0.0 - for i, (image, target, task) in enumerate(zip(images, targets, tasks)): - output = model(image.unsqueeze(0)) - + outputs = model(noisy_imgs) + task_losses = [] + for i, task in enumerate(tasks): if task == 'denoise': - loss = criterion_denoise(output.squeeze(), target) - elif task == 'rotate': - loss = criterion_rotate(output.view(-1, len(set(outputs))), target) - - total_loss += loss - outputs.append(output) - - avg_loss = total_loss / len(images) - avg_loss.backward() + loss = criterion_denoise(outputs[i], targets[i]) + denoise_losses.append(loss.item()) + else: + loss = criterion_rotate(outputs[i].unsqueeze(0), targets[i].unsqueeze(0)) + rotate_losses.append(loss.item()) + task_losses.append(loss) + + batch_loss = sum(task_losses) / len(task_losses) + batch_loss.backward() optimizer.step() - total_train_loss += avg_loss.item() - # Separate losses for reporting (you'd need to track this based on tasks) - + total_train_loss += batch_loss.item() avg_total_train_loss = total_train_loss / len(train_loader) print(f"Training Loss: {avg_total_train_loss:.4f}") - # Validation phase model.eval() with torch.no_grad(): val_losses = [] for batch in val_loader: - images, targets, tasks = zip(*batch) + noisy_imgs, targets, tasks = batch - if device == "cuda": - images = [img.cuda() for img in images] - targets = [t.cuda() for t in targets] - - outputs = [] - total_loss = 0.0 - for i, (image, target, task) in enumerate(zip(images, targets, tasks)): - output = model(image.unsqueeze(0)) - + noisy_imgs = noisy_imgs.to(device) + targets = targets.to(device) + + outputs = model(noisy_imgs) + + task_losses = [] + for i, task in enumerate(tasks): if task == 'denoise': - loss = criterion_denoise(output.squeeze(), target) - elif task == 'rotate': - loss = criterion_rotate(output.view(-1, len(set(outputs))), target) - - total_loss += loss - outputs.append(output) - - avg_val_loss = total_loss / len(images) - val_losses.append(avg_val_loss.item()) - + loss = criterion_denoise(outputs[i], targets[i]) + else: + loss = criterion_rotate(outputs[i].unsqueeze(0), targets[i].unsqueeze(0)) + task_losses.append(loss) + + batch_loss = sum(task_losses) / len(task_losses) + val_losses.append(batch_loss.item()) avg_val_loss = sum(val_losses) / len(val_loader) print(f"Validation Loss: {avg_val_loss:.4f}") - # Save the best model if avg_val_loss < best_val_loss: best_val_loss = avg_val_loss model.save("BASEv0.1") @@ -122,4 +98,4 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): if __name__ == "__main__": data_path1 = "/root/training_data/vision-dataset/images_checkpoint.parquet" data_path2 = "/root/training_data/vision-dataset/vec_images_dataset.parquet" - pretrain_model(data_path1, data_path2, num_epochs=8) \ No newline at end of file + pretrain_model(data_path1, data_path2, num_epochs=3) \ No newline at end of file From 8a809269e5125d45a8daa93fd058a22f81890699 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 23:04:49 +0100 Subject: [PATCH 34/55] fix inline error --- src/aiia/data/DataLoader.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index 567954f..cbb22a6 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -134,17 +134,18 @@ class AIIADataLoader: self.items = [] for idx in range(len(dataset)): item = self.loader.get_item(idx) - if item is not None: + if item is not None: # Only add valid items if self.pretraining: img = item[0] if isinstance(item, tuple) else item self.items.append((img, 'denoise', img)) self.items.append((img, 'rotate', 0)) - else: + else: self.items.append(item) if not self.items: raise ValueError("No valid items were loaded from the dataset") + train_indices, val_indices = self._split_data() self.train_dataset = self._create_subset(train_indices) From 2b55f02b5054a29ff898a66d9e54fc2590040cc9 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 23:08:56 +0100 Subject: [PATCH 35/55] eorr handling because we have a tensor misshaping --- src/aiia/data/DataLoader.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index cbb22a6..4ba5032 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -189,7 +189,12 @@ class AIIADataset(torch.utils.data.Dataset): if self.pretraining: image, task, label = item + if not isinstance(image, Image.Image): + raise ValueError(f"Invalid image at index {idx}") + image = self.transform(image) + if image.shape != (3, 224, 224): + raise ValueError(f"Invalid image shape at index {idx}: {image.shape}") if task == 'denoise': noise_std = 0.1 @@ -202,13 +207,22 @@ class AIIADataset(torch.utils.data.Dataset): rotated_img = transforms.functional.rotate(image, angle) target = torch.tensor(angle / 90).long() return rotated_img, target, task + else: + raise ValueError(f"Invalid task at index {idx}: {task}") else: if isinstance(item, tuple) and len(item) == 2: image, label = item + if not isinstance(image, Image.Image): + raise ValueError(f"Invalid image at index {idx}") image = self.transform(image) + if image.shape != (3, 224, 224): + raise ValueError(f"Invalid image shape at index {idx}: {image.shape}") return image, label else: if isinstance(item, Image.Image): - return self.transform(item) + image = self.transform(item) else: - return self.transform(item[0]) + image = self.transform(item[0]) + if image.shape != (3, 224, 224): + raise ValueError(f"Invalid image shape at index {idx}: {image.shape}") + return image From b501ae8317b9fbd18e48fb280eb1421cc0fbc5db Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 23:20:15 +0100 Subject: [PATCH 36/55] new pretraining script --- src/pretrain.py | 79 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 61 insertions(+), 18 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index 8436d51..4d98538 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -1,29 +1,65 @@ import torch -from torch import nn +from torch import nn, utils import pandas as pd from aiia.model.config import AIIAConfig from aiia.model import AIIABase from aiia.data.DataLoader import AIIADataLoader +import os +import copy def pretrain_model(data_path1, data_path2, num_epochs=3): + # Read and merge datasets df1 = pd.read_parquet(data_path1).head(10000) df2 = pd.read_parquet(data_path2).head(10000) merged_df = pd.concat([df1, df2], ignore_index=True) + # Model configuration config = AIIAConfig( model_name="AIIA-Base-512x20k", ) + # Initialize model and data loader model = AIIABase(config) + + # Define a custom collate function to handle preprocessing and skip bad samples + def safe_collate(batch): + processed_batch = [] + for sample in batch: + try: + # Process each sample here (e.g., decode image, preprocess, etc.) + # Replace with actual preprocessing steps + processed_sample = { + 'image': torch.randn(3, 224, 224), # Example tensor + 'target': torch.randint(0, 10, (1,)), # Example target + 'task': 'denoise' # Example task + } + processed_batch.append(processed_sample) + except Exception as e: + print(f"Skipping sample due to error: {e}") + if not processed_batch: + return None # Skip batch if all samples are invalid - aiia_loader = AIIADataLoader(merged_df, column="image_bytes", batch_size=32, pretraining=True) + # Stack tensors for the batch + images = torch.stack([x['image'] for x in processed_batch]) + targets = torch.stack([x['target'] for x in processed_batch]) + tasks = [x['task'] for x in processed_batch] + + return (images, targets, tasks) + + aiia_loader = AIIADataLoader( + merged_df, + column="image_bytes", + batch_size=32, + pretraining=True, + collate_fn=safe_collate + ) train_loader = aiia_loader.train_loader val_loader = aiia_loader.val_loader + # Define loss functions and optimizer criterion_denoise = nn.MSELoss() criterion_rotate = nn.CrossEntropyLoss() - optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate) device = "cuda" if torch.cuda.is_available() else "cpu" @@ -35,49 +71,55 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): print(f"\nEpoch {epoch+1}/{num_epochs}") print("-" * 20) + # Training phase model.train() total_train_loss = 0.0 - denoise_losses = [] - rotate_losses = [] - + for batch in train_loader: + if batch is None: + continue # Skip empty batches + noisy_imgs, targets, tasks = batch - noisy_imgs = noisy_imgs.to(device) targets = targets.to(device) + optimizer.zero_grad() - + outputs = model(noisy_imgs) task_losses = [] + for i, task in enumerate(tasks): if task == 'denoise': loss = criterion_denoise(outputs[i], targets[i]) - denoise_losses.append(loss.item()) else: loss = criterion_rotate(outputs[i].unsqueeze(0), targets[i].unsqueeze(0)) - rotate_losses.append(loss.item()) task_losses.append(loss) batch_loss = sum(task_losses) / len(task_losses) batch_loss.backward() optimizer.step() - + total_train_loss += batch_loss.item() + avg_total_train_loss = total_train_loss / len(train_loader) print(f"Training Loss: {avg_total_train_loss:.4f}") + # Validation phase model.eval() + val_loss = 0.0 + with torch.no_grad(): - val_losses = [] for batch in val_loader: + if batch is None: + continue + noisy_imgs, targets, tasks = batch - noisy_imgs = noisy_imgs.to(device) targets = targets.to(device) outputs = model(noisy_imgs) - task_losses = [] + for i, task in enumerate(tasks): if task == 'denoise': loss = criterion_denoise(outputs[i], targets[i]) @@ -86,10 +128,11 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): task_losses.append(loss) batch_loss = sum(task_losses) / len(task_losses) - val_losses.append(batch_loss.item()) - avg_val_loss = sum(val_losses) / len(val_loader) - print(f"Validation Loss: {avg_val_loss:.4f}") - + val_loss += batch_loss.item() + + avg_val_loss = val_loss / len(val_loader) + print(f"Validation Loss: {avg_val_loss:.4f}") + if avg_val_loss < best_val_loss: best_val_loss = avg_val_loss model.save("BASEv0.1") From 29f0d86ff7c9360437ae36500142115174e034af Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 23:26:42 +0100 Subject: [PATCH 37/55] kernel size as large as channel size --- src/aiia/model/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aiia/model/config.py b/src/aiia/model/config.py index e2ae83e..02bc709 100644 --- a/src/aiia/model/config.py +++ b/src/aiia/model/config.py @@ -8,7 +8,7 @@ class AIIAConfig: def __init__( self, model_name: str = "AIIA", - kernel_size: int = 5, + kernel_size: int = 3, activation_function: str = "GELU", hidden_size: int = 512, num_hidden_layers: int = 12, From 3749ba9c5fa74889044f78d6e7ded226c558a954 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 08:39:42 +0100 Subject: [PATCH 38/55] updated base models MaxPool2D --- src/aiia/model/Model.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/aiia/model/Model.py b/src/aiia/model/Model.py index a9584fa..771caf8 100644 --- a/src/aiia/model/Model.py +++ b/src/aiia/model/Model.py @@ -80,8 +80,9 @@ class AIIABaseShared(AIIA): # Initialize max pooling layer self.max_pool = nn.MaxPool2d( - kernel_size=self.config.kernel_size, - padding=1 # Using same padding as in Conv2d layers + kernel_size=1, + stride=1, + padding=1 ) def forward(self, x): @@ -117,7 +118,7 @@ class AIIABase(AIIA): nn.Conv2d(in_channels, self.config.hidden_size, kernel_size=self.config.kernel_size, padding=1), getattr(nn, self.config.activation_function)(), - nn.MaxPool2d(kernel_size=2) + nn.MaxPool2d(kernel_size=1, stride=1) ]) in_channels = self.config.hidden_size From d205346741a7d0a71000eee9fe4bc848dccf82cb Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 08:44:46 +0100 Subject: [PATCH 39/55] downsized trainingdata from 20k to 1k --- src/pretrain.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index 4d98538..505b032 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -9,13 +9,13 @@ import copy def pretrain_model(data_path1, data_path2, num_epochs=3): # Read and merge datasets - df1 = pd.read_parquet(data_path1).head(10000) - df2 = pd.read_parquet(data_path2).head(10000) + df1 = pd.read_parquet(data_path1).head(5000) + df2 = pd.read_parquet(data_path2).head(5000) merged_df = pd.concat([df1, df2], ignore_index=True) # Model configuration config = AIIAConfig( - model_name="AIIA-Base-512x20k", + model_name="AIIA-Base-512x10k", ) # Initialize model and data loader From 32526c3c30eb9a32532c35f8441763fa0caec1a0 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 08:53:01 +0100 Subject: [PATCH 40/55] 5k image --- src/pretrain.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index 505b032..c6e7705 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -9,13 +9,13 @@ import copy def pretrain_model(data_path1, data_path2, num_epochs=3): # Read and merge datasets - df1 = pd.read_parquet(data_path1).head(5000) - df2 = pd.read_parquet(data_path2).head(5000) + df1 = pd.read_parquet(data_path1).head(2500) + df2 = pd.read_parquet(data_path2).head(2500) merged_df = pd.concat([df1, df2], ignore_index=True) # Model configuration config = AIIAConfig( - model_name="AIIA-Base-512x10k", + model_name="AIIA-Base-512x5k", ) # Initialize model and data loader From 8dad1d7150f673af41bc510de97b27ef93ab9b85 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 09:02:22 +0100 Subject: [PATCH 41/55] downszied batchsize --- src/pretrain.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index c6e7705..38663e3 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -9,13 +9,13 @@ import copy def pretrain_model(data_path1, data_path2, num_epochs=3): # Read and merge datasets - df1 = pd.read_parquet(data_path1).head(2500) - df2 = pd.read_parquet(data_path2).head(2500) + df1 = pd.read_parquet(data_path1).head(10000) + df2 = pd.read_parquet(data_path2).head(10000) merged_df = pd.concat([df1, df2], ignore_index=True) # Model configuration config = AIIAConfig( - model_name="AIIA-Base-512x5k", + model_name="AIIA-Base-512x20k", ) # Initialize model and data loader @@ -49,7 +49,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): aiia_loader = AIIADataLoader( merged_df, column="image_bytes", - batch_size=32, + batch_size=8, pretraining=True, collate_fn=safe_collate ) From b546f4ee27db9c4bfc6e8cf58940b2f44839f0f2 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 09:08:39 +0100 Subject: [PATCH 42/55] furhter batchsizing downsizing --- src/pretrain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pretrain.py b/src/pretrain.py index 38663e3..31dce37 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -49,7 +49,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): aiia_loader = AIIADataLoader( merged_df, column="image_bytes", - batch_size=8, + batch_size=4, pretraining=True, collate_fn=safe_collate ) From 6c146f2767774ed0571475e1b47e6fdbf5ad81dc Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 09:13:22 +0100 Subject: [PATCH 43/55] added progressbar for batches --- src/pretrain.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index 31dce37..45aac3f 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -4,8 +4,8 @@ import pandas as pd from aiia.model.config import AIIAConfig from aiia.model import AIIABase from aiia.data.DataLoader import AIIADataLoader -import os -import copy +from tqdm import tqdm + def pretrain_model(data_path1, data_path2, num_epochs=3): # Read and merge datasets @@ -49,7 +49,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): aiia_loader = AIIADataLoader( merged_df, column="image_bytes", - batch_size=4, + batch_size=2, pretraining=True, collate_fn=safe_collate ) @@ -75,7 +75,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): model.train() total_train_loss = 0.0 - for batch in train_loader: + for batch in tqdm(train_loader): if batch is None: continue # Skip empty batches From 91a568731faa68bdcaa4eef474aaaac05ed51c68 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 09:26:08 +0100 Subject: [PATCH 44/55] removed placeholder collate function --- src/pretrain.py | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index 45aac3f..c4bc1a7 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -26,22 +26,22 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): processed_batch = [] for sample in batch: try: - # Process each sample here (e.g., decode image, preprocess, etc.) - # Replace with actual preprocessing steps - processed_sample = { - 'image': torch.randn(3, 224, 224), # Example tensor - 'target': torch.randint(0, 10, (1,)), # Example target - 'task': 'denoise' # Example task - } - processed_batch.append(processed_sample) + noisy_img, target, task = sample + processed_batch.append({ + 'image': noisy_img, + 'target': target, + 'task': task + }) except Exception as e: print(f"Skipping sample due to error: {e}") + continue + if not processed_batch: return None # Skip batch if all samples are invalid # Stack tensors for the batch images = torch.stack([x['image'] for x in processed_batch]) - targets = torch.stack([x['target'] for x in processed_batch]) + targets = [x['target'] for x in processed_batch] # Don't stack targets yet tasks = [x['task'] for x in processed_batch] return (images, targets, tasks) @@ -81,18 +81,21 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): noisy_imgs, targets, tasks = batch noisy_imgs = noisy_imgs.to(device) - targets = targets.to(device) optimizer.zero_grad() outputs = model(noisy_imgs) task_losses = [] - for i, task in enumerate(tasks): + for i, (output, target, task) in enumerate(zip(outputs, targets, tasks)): if task == 'denoise': - loss = criterion_denoise(outputs[i], targets[i]) - else: - loss = criterion_rotate(outputs[i].unsqueeze(0), targets[i].unsqueeze(0)) + target = target.to(device) # Move target to device + # Ensure output and target have same shape + loss = criterion_denoise(output, target) + else: # rotate task + target = target.to(device) # Move target to device + # For rotation task, output should be [batch_size, num_classes] + loss = criterion_rotate(output.view(1, -1), target.view(-1)) task_losses.append(loss) batch_loss = sum(task_losses) / len(task_losses) @@ -101,8 +104,8 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): total_train_loss += batch_loss.item() - avg_total_train_loss = total_train_loss / len(train_loader) - print(f"Training Loss: {avg_total_train_loss:.4f}") + avg_train_loss = total_train_loss / len(train_loader) + print(f"Training Loss: {avg_train_loss:.4f}") # Validation phase model.eval() @@ -115,16 +118,17 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): noisy_imgs, targets, tasks = batch noisy_imgs = noisy_imgs.to(device) - targets = targets.to(device) outputs = model(noisy_imgs) task_losses = [] - for i, task in enumerate(tasks): + for i, (output, target, task) in enumerate(zip(outputs, targets, tasks)): if task == 'denoise': - loss = criterion_denoise(outputs[i], targets[i]) + target = target.to(device) + loss = criterion_denoise(output, target) else: - loss = criterion_rotate(outputs[i].unsqueeze(0), targets[i].unsqueeze(0)) + target = target.to(device) + loss = criterion_rotate(output.view(1, -1), target.view(-1)) task_losses.append(loss) batch_loss = sum(task_losses) / len(task_losses) From 13cf1897aece547a101562db59d7c2e92d459594 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 09:32:20 +0100 Subject: [PATCH 45/55] handle both pretraining methods --- src/pretrain.py | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index c4bc1a7..690bd9e 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -6,7 +6,6 @@ from aiia.model import AIIABase from aiia.data.DataLoader import AIIADataLoader from tqdm import tqdm - def pretrain_model(data_path1, data_path2, num_epochs=3): # Read and merge datasets df1 = pd.read_parquet(data_path1).head(10000) @@ -21,7 +20,6 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): # Initialize model and data loader model = AIIABase(config) - # Define a custom collate function to handle preprocessing and skip bad samples def safe_collate(batch): processed_batch = [] for sample in batch: @@ -37,11 +35,11 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): continue if not processed_batch: - return None # Skip batch if all samples are invalid + return None # Stack tensors for the batch images = torch.stack([x['image'] for x in processed_batch]) - targets = [x['target'] for x in processed_batch] # Don't stack targets yet + targets = torch.stack([x['target'] for x in processed_batch]) # Stack targets tasks = [x['task'] for x in processed_batch] return (images, targets, tasks) @@ -57,7 +55,6 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): train_loader = aiia_loader.train_loader val_loader = aiia_loader.val_loader - # Define loss functions and optimizer criterion_denoise = nn.MSELoss() criterion_rotate = nn.CrossEntropyLoss() optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate) @@ -77,25 +74,29 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): for batch in tqdm(train_loader): if batch is None: - continue # Skip empty batches + continue noisy_imgs, targets, tasks = batch + batch_size = noisy_imgs.size(0) noisy_imgs = noisy_imgs.to(device) + targets = targets.to(device) optimizer.zero_grad() + # Get model outputs and reshape if necessary outputs = model(noisy_imgs) - task_losses = [] - for i, (output, target, task) in enumerate(zip(outputs, targets, tasks)): + task_losses = [] + for i, task in enumerate(tasks): if task == 'denoise': - target = target.to(device) # Move target to device - # Ensure output and target have same shape + # Ensure output matches target shape for denoising + output = outputs[i].view(3, 224, 224) # Reshape to match image dimensions + target = targets[i] loss = criterion_denoise(output, target) else: # rotate task - target = target.to(device) # Move target to device - # For rotation task, output should be [batch_size, num_classes] - loss = criterion_rotate(output.view(1, -1), target.view(-1)) + output = outputs[i].view(-1) # Flatten output for rotation prediction + target = targets[i].long() # Convert target to long for classification + loss = criterion_rotate(output.unsqueeze(0), target.unsqueeze(0)) task_losses.append(loss) batch_loss = sum(task_losses) / len(task_losses) @@ -118,17 +119,20 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): noisy_imgs, targets, tasks = batch noisy_imgs = noisy_imgs.to(device) + targets = targets.to(device) outputs = model(noisy_imgs) - task_losses = [] - for i, (output, target, task) in enumerate(zip(outputs, targets, tasks)): + task_losses = [] + for i, task in enumerate(tasks): if task == 'denoise': - target = target.to(device) + output = outputs[i].view(3, 224, 224) + target = targets[i] loss = criterion_denoise(output, target) else: - target = target.to(device) - loss = criterion_rotate(output.view(1, -1), target.view(-1)) + output = outputs[i].view(-1) + target = targets[i].long() + loss = criterion_rotate(output.unsqueeze(0), target.unsqueeze(0)) task_losses.append(loss) batch_loss = sum(task_losses) / len(task_losses) From b6b63851caa9ef582cbbeccb8c9a6b652353c8e0 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 10:15:00 +0100 Subject: [PATCH 46/55] addeed tasks for both denosing and rotation --- src/pretrain.py | 147 +++++++++++++++++++++++++++++------------------- 1 file changed, 88 insertions(+), 59 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index 690bd9e..201c03f 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -6,6 +6,7 @@ from aiia.model import AIIABase from aiia.data.DataLoader import AIIADataLoader from tqdm import tqdm + def pretrain_model(data_path1, data_path2, num_epochs=3): # Read and merge datasets df1 = pd.read_parquet(data_path1).head(10000) @@ -21,28 +22,49 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): model = AIIABase(config) def safe_collate(batch): - processed_batch = [] + denoise_batch = [] + rotate_batch = [] + for sample in batch: try: noisy_img, target, task = sample - processed_batch.append({ - 'image': noisy_img, - 'target': target, - 'task': task - }) + if task == 'denoise': + denoise_batch.append({ + 'image': noisy_img, + 'target': target, + 'task': task + }) + else: # rotate task + rotate_batch.append({ + 'image': noisy_img, + 'target': target, + 'task': task + }) except Exception as e: print(f"Skipping sample due to error: {e}") continue - if not processed_batch: + if not denoise_batch and not rotate_batch: return None - # Stack tensors for the batch - images = torch.stack([x['image'] for x in processed_batch]) - targets = torch.stack([x['target'] for x in processed_batch]) # Stack targets - tasks = [x['task'] for x in processed_batch] + batch_data = { + 'denoise': None, + 'rotate': None + } - return (images, targets, tasks) + # Process denoise batch + if denoise_batch: + images = torch.stack([x['image'] for x in denoise_batch]) + targets = torch.stack([x['target'] for x in denoise_batch]) + batch_data['denoise'] = (images, targets) + + # Process rotate batch + if rotate_batch: + images = torch.stack([x['image'] for x in rotate_batch]) + targets = torch.stack([x['target'] for x in rotate_batch]) + batch_data['rotate'] = (images, targets) + + return batch_data aiia_loader = AIIADataLoader( merged_df, @@ -71,74 +93,81 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): # Training phase model.train() total_train_loss = 0.0 + batch_count = 0 - for batch in tqdm(train_loader): - if batch is None: + for batch_data in tqdm(train_loader): + if batch_data is None: continue - noisy_imgs, targets, tasks = batch - batch_size = noisy_imgs.size(0) - noisy_imgs = noisy_imgs.to(device) - targets = targets.to(device) - optimizer.zero_grad() + batch_loss = 0 - # Get model outputs and reshape if necessary - outputs = model(noisy_imgs) + # Handle denoise task + if batch_data['denoise'] is not None: + noisy_imgs, targets = batch_data['denoise'] + noisy_imgs = noisy_imgs.to(device) + targets = targets.to(device) + + outputs = model(noisy_imgs) + loss = criterion_denoise(outputs, targets) + batch_loss += loss - task_losses = [] - for i, task in enumerate(tasks): - if task == 'denoise': - # Ensure output matches target shape for denoising - output = outputs[i].view(3, 224, 224) # Reshape to match image dimensions - target = targets[i] - loss = criterion_denoise(output, target) - else: # rotate task - output = outputs[i].view(-1) # Flatten output for rotation prediction - target = targets[i].long() # Convert target to long for classification - loss = criterion_rotate(output.unsqueeze(0), target.unsqueeze(0)) - task_losses.append(loss) + # Handle rotate task + if batch_data['rotate'] is not None: + imgs, targets = batch_data['rotate'] + imgs = imgs.to(device) + targets = targets.long().to(device) + + outputs = model(imgs) + loss = criterion_rotate(outputs, targets) + batch_loss += loss - batch_loss = sum(task_losses) / len(task_losses) - batch_loss.backward() - optimizer.step() - - total_train_loss += batch_loss.item() + if batch_loss > 0: + batch_loss.backward() + optimizer.step() + total_train_loss += batch_loss.item() + batch_count += 1 - avg_train_loss = total_train_loss / len(train_loader) + avg_train_loss = total_train_loss / max(batch_count, 1) print(f"Training Loss: {avg_train_loss:.4f}") # Validation phase model.eval() val_loss = 0.0 + val_batch_count = 0 with torch.no_grad(): - for batch in val_loader: - if batch is None: + for batch_data in val_loader: + if batch_data is None: continue - noisy_imgs, targets, tasks = batch - noisy_imgs = noisy_imgs.to(device) - targets = targets.to(device) + batch_loss = 0 - outputs = model(noisy_imgs) + # Handle denoise task + if batch_data['denoise'] is not None: + noisy_imgs, targets = batch_data['denoise'] + noisy_imgs = noisy_imgs.to(device) + targets = targets.to(device) + + outputs = model(noisy_imgs) + loss = criterion_denoise(outputs, targets) + batch_loss += loss - task_losses = [] - for i, task in enumerate(tasks): - if task == 'denoise': - output = outputs[i].view(3, 224, 224) - target = targets[i] - loss = criterion_denoise(output, target) - else: - output = outputs[i].view(-1) - target = targets[i].long() - loss = criterion_rotate(output.unsqueeze(0), target.unsqueeze(0)) - task_losses.append(loss) + # Handle rotate task + if batch_data['rotate'] is not None: + imgs, targets = batch_data['rotate'] + imgs = imgs.to(device) + targets = targets.long().to(device) + + outputs = model(imgs) + loss = criterion_rotate(outputs, targets) + batch_loss += loss - batch_loss = sum(task_losses) / len(task_losses) - val_loss += batch_loss.item() + if batch_loss > 0: + val_loss += batch_loss.item() + val_batch_count += 1 - avg_val_loss = val_loss / len(val_loader) + avg_val_loss = val_loss / max(val_batch_count, 1) print(f"Validation Loss: {avg_val_loss:.4f}") if avg_val_loss < best_val_loss: From fe4d6b5b22fed2c52cbaf3dcab7444300d81b6fc Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 10:26:28 +0100 Subject: [PATCH 47/55] corrected viewing and some prints --- src/pretrain.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/pretrain.py b/src/pretrain.py index 201c03f..0792f09 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -6,7 +6,6 @@ from aiia.model import AIIABase from aiia.data.DataLoader import AIIADataLoader from tqdm import tqdm - def pretrain_model(data_path1, data_path2, num_epochs=3): # Read and merge datasets df1 = pd.read_parquet(data_path1).head(10000) @@ -108,7 +107,19 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): noisy_imgs = noisy_imgs.to(device) targets = targets.to(device) + # Print shapes for debugging + print(f"\nDenoising task shapes:") + print(f"Input shape: {noisy_imgs.shape}") + print(f"Target shape: {targets.shape}") + outputs = model(noisy_imgs) + print(f"Raw output shape: {outputs.shape}") + + # Reshape output to match target dimensions + batch_size = targets.size(0) + outputs = outputs.view(batch_size, 3, 224, 224) + print(f"Reshaped output shape: {outputs.shape}") + loss = criterion_denoise(outputs, targets) batch_loss += loss @@ -118,7 +129,18 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): imgs = imgs.to(device) targets = targets.long().to(device) + # Print shapes for debugging + print(f"\nRotation task shapes:") + print(f"Input shape: {imgs.shape}") + print(f"Target shape: {targets.shape}") + outputs = model(imgs) + print(f"Raw output shape: {outputs.shape}") + + # Reshape output for rotation classification + outputs = outputs.view(targets.size(0), -1) # Flatten to [batch_size, features] + print(f"Reshaped output shape: {outputs.shape}") + loss = criterion_rotate(outputs, targets) batch_loss += loss @@ -150,6 +172,8 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): targets = targets.to(device) outputs = model(noisy_imgs) + batch_size = targets.size(0) + outputs = outputs.view(batch_size, 3, 224, 224) loss = criterion_denoise(outputs, targets) batch_loss += loss @@ -160,6 +184,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): targets = targets.long().to(device) outputs = model(imgs) + outputs = outputs.view(targets.size(0), -1) loss = criterion_rotate(outputs, targets) batch_loss += loss From 58baf0ad3c9b212000fbbb4372120161d9ac9950 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 10:43:59 +0100 Subject: [PATCH 48/55] overall improvement --- src/pretrain.py | 78 +++++++++++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 32 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index 0792f09..55b4650 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -6,6 +6,18 @@ from aiia.model import AIIABase from aiia.data.DataLoader import AIIADataLoader from tqdm import tqdm +class ProjectionHead(nn.Module): + def __init__(self): + super().__init__() + self.conv_denoise = nn.Conv2d(512, 3, kernel_size=1) + self.conv_rotate = nn.Conv2d(512, 4, kernel_size=1) # 4 classes for 0, 90, 180, 270 degrees + + def forward(self, x, task='denoise'): + if task == 'denoise': + return self.conv_denoise(x) + else: + return self.conv_rotate(x).mean(dim=(2, 3)) # Global average pooling for rotation task + def pretrain_model(data_path1, data_path2, num_epochs=3): # Read and merge datasets df1 = pd.read_parquet(data_path1).head(10000) @@ -17,9 +29,14 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): model_name="AIIA-Base-512x20k", ) - # Initialize model and data loader + # Initialize model and projection head model = AIIABase(config) + projection_head = ProjectionHead() + device = "cuda" if torch.cuda.is_available() else "cpu" + model.to(device) + projection_head.to(device) + def safe_collate(batch): denoise_batch = [] rotate_batch = [] @@ -51,13 +68,11 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): 'rotate': None } - # Process denoise batch if denoise_batch: images = torch.stack([x['image'] for x in denoise_batch]) targets = torch.stack([x['target'] for x in denoise_batch]) batch_data['denoise'] = (images, targets) - # Process rotate batch if rotate_batch: images = torch.stack([x['image'] for x in rotate_batch]) targets = torch.stack([x['target'] for x in rotate_batch]) @@ -78,10 +93,12 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): criterion_denoise = nn.MSELoss() criterion_rotate = nn.CrossEntropyLoss() - optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate) - - device = "cuda" if torch.cuda.is_available() else "cpu" - model.to(device) + + # Update optimizer to include projection head parameters + optimizer = torch.optim.AdamW( + list(model.parameters()) + list(projection_head.parameters()), + lr=config.learning_rate + ) best_val_loss = float('inf') @@ -91,6 +108,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): # Training phase model.train() + projection_head.train() total_train_loss = 0.0 batch_count = 0 @@ -107,18 +125,16 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): noisy_imgs = noisy_imgs.to(device) targets = targets.to(device) - # Print shapes for debugging + # Get features from base model + features = model(noisy_imgs) + # Project features back to image space + outputs = projection_head(features, task='denoise') + print(f"\nDenoising task shapes:") print(f"Input shape: {noisy_imgs.shape}") print(f"Target shape: {targets.shape}") - - outputs = model(noisy_imgs) - print(f"Raw output shape: {outputs.shape}") - - # Reshape output to match target dimensions - batch_size = targets.size(0) - outputs = outputs.view(batch_size, 3, 224, 224) - print(f"Reshaped output shape: {outputs.shape}") + print(f"Features shape: {features.shape}") + print(f"Output shape: {outputs.shape}") loss = criterion_denoise(outputs, targets) batch_loss += loss @@ -129,17 +145,16 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): imgs = imgs.to(device) targets = targets.long().to(device) - # Print shapes for debugging + # Get features from base model + features = model(imgs) + # Project features to rotation predictions + outputs = projection_head(features, task='rotate') + print(f"\nRotation task shapes:") print(f"Input shape: {imgs.shape}") print(f"Target shape: {targets.shape}") - - outputs = model(imgs) - print(f"Raw output shape: {outputs.shape}") - - # Reshape output for rotation classification - outputs = outputs.view(targets.size(0), -1) # Flatten to [batch_size, features] - print(f"Reshaped output shape: {outputs.shape}") + print(f"Features shape: {features.shape}") + print(f"Output shape: {outputs.shape}") loss = criterion_rotate(outputs, targets) batch_loss += loss @@ -155,6 +170,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): # Validation phase model.eval() + projection_head.eval() val_loss = 0.0 val_batch_count = 0 @@ -165,26 +181,23 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): batch_loss = 0 - # Handle denoise task if batch_data['denoise'] is not None: noisy_imgs, targets = batch_data['denoise'] noisy_imgs = noisy_imgs.to(device) targets = targets.to(device) - outputs = model(noisy_imgs) - batch_size = targets.size(0) - outputs = outputs.view(batch_size, 3, 224, 224) + features = model(noisy_imgs) + outputs = projection_head(features, task='denoise') loss = criterion_denoise(outputs, targets) batch_loss += loss - # Handle rotate task if batch_data['rotate'] is not None: imgs, targets = batch_data['rotate'] imgs = imgs.to(device) targets = targets.long().to(device) - outputs = model(imgs) - outputs = outputs.view(targets.size(0), -1) + features = model(imgs) + outputs = projection_head(features, task='rotate') loss = criterion_rotate(outputs, targets) batch_loss += loss @@ -197,7 +210,8 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): if avg_val_loss < best_val_loss: best_val_loss = avg_val_loss - model.save("BASEv0.1") + # Save both model and projection head + model.save("AIIA-base-512") print("Best model saved!") if __name__ == "__main__": From 8d08bfc14c4572020933ddf6cf710356c8eaf3db Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 10:56:16 +0100 Subject: [PATCH 49/55] removed rint statemetns and added csv saving --- src/pretrain.py | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index 55b4650..6e4c05c 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -1,5 +1,6 @@ import torch -from torch import nn, utils +from torch import nn +import csv import pandas as pd from aiia.model.config import AIIAConfig from aiia.model import AIIABase @@ -101,7 +102,8 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): ) best_val_loss = float('inf') - + train_losses = [] + val_losses = [] for epoch in range(num_epochs): print(f"\nEpoch {epoch+1}/{num_epochs}") print("-" * 20) @@ -128,14 +130,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): # Get features from base model features = model(noisy_imgs) # Project features back to image space - outputs = projection_head(features, task='denoise') - - print(f"\nDenoising task shapes:") - print(f"Input shape: {noisy_imgs.shape}") - print(f"Target shape: {targets.shape}") - print(f"Features shape: {features.shape}") - print(f"Output shape: {outputs.shape}") - + outputs = projection_head(features, task='denoise') loss = criterion_denoise(outputs, targets) batch_loss += loss @@ -150,12 +145,6 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): # Project features to rotation predictions outputs = projection_head(features, task='rotate') - print(f"\nRotation task shapes:") - print(f"Input shape: {imgs.shape}") - print(f"Target shape: {targets.shape}") - print(f"Features shape: {features.shape}") - print(f"Output shape: {outputs.shape}") - loss = criterion_rotate(outputs, targets) batch_loss += loss @@ -166,6 +155,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): batch_count += 1 avg_train_loss = total_train_loss / max(batch_count, 1) + train_losses.append(avg_train_loss) print(f"Training Loss: {avg_train_loss:.4f}") # Validation phase @@ -206,6 +196,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): val_batch_count += 1 avg_val_loss = val_loss / max(val_batch_count, 1) + val_losses.append(avg_val_loss) print(f"Validation Loss: {avg_val_loss:.4f}") if avg_val_loss < best_val_loss: @@ -214,6 +205,21 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): model.save("AIIA-base-512") print("Best model saved!") + # Prepare the data to be written to the CSV file + data = list(zip(range(1, len(train_losses) + 1), train_losses, val_losses)) + + # Specify the CSV file name + csv_file = 'losses.csv' + + # Write the data to the CSV file + with open(csv_file, mode='w', newline='') as file: + writer = csv.writer(file) + # Write the header + writer.writerow(['Epoch', 'Train Loss', 'Validation Loss']) + # Write the data + writer.writerows(data) + print(f"Data has been written to {csv_file}") + if __name__ == "__main__": data_path1 = "/root/training_data/vision-dataset/images_checkpoint.parquet" data_path2 = "/root/training_data/vision-dataset/vec_images_dataset.parquet" From 102d9520437ff031bc994548ac9eefbe4c79b71e Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 11:06:42 +0100 Subject: [PATCH 50/55] increased epochs --- src/pretrain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pretrain.py b/src/pretrain.py index 6e4c05c..02e4e7f 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -223,4 +223,4 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): if __name__ == "__main__": data_path1 = "/root/training_data/vision-dataset/images_checkpoint.parquet" data_path2 = "/root/training_data/vision-dataset/vec_images_dataset.parquet" - pretrain_model(data_path1, data_path2, num_epochs=3) \ No newline at end of file + pretrain_model(data_path1, data_path2, num_epochs=10) \ No newline at end of file From 1e665c4604b8a1f11e152b286e03c39b29cc76cf Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Tue, 28 Jan 2025 10:58:33 +0100 Subject: [PATCH 51/55] added first pip install version 0.1 --- MANIFEST.in | 4 ++++ pyproject.toml | 8 ++++++++ requirements.txt | 5 +++++ setup.cfg | 26 ++++++++++++++++++++++++++ setup.py | 25 +++++++++++++++++++++++++ src/aiia/__init__.py | 8 +++++--- src/aiia/data/__init__.py | 4 +++- src/aiia/model/Model.py | 6 +++++- src/aiia/model/__init__.py | 21 ++++++++++++++++++++- 9 files changed, 101 insertions(+), 6 deletions(-) create mode 100644 MANIFEST.in create mode 100644 pyproject.toml create mode 100644 requirements.txt create mode 100644 setup.cfg create mode 100644 setup.py diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..6925c8f --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,4 @@ +include LICENSE +include README.md +include requirements.txt +recursive-include src/aiia * \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a8bdbe9 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,8 @@ +[build-system] +requires = ["setuptools>=42", "wheel"] +build-backend = "setuptools.build_meta" + +[tool.black] +line-length = 88 +target-version = ['py37'] +include = '\.pyi?$' \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..06e8438 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +torch>=4.5.0 +numpy +tqdm +pytest +pillow \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..fb45363 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,26 @@ +[metadata] +name = aiia +version = 0.1.0 +author = Your Name +author_email = falko.habel@gmx.de +description = AIIA deep learning model implementation +long_description = file: README.md +long_description_content_type = text/markdown +url = https://gitea.fabelous.app/Maschine-Learning/AIIA.git +classifiers = + Programming Language :: Python :: 3 + License :: OSI Approved :: MIT License + Operating System :: OS Independent + +[options] +package_dir = + = src +packages = find: +python_requires = >=3.7 +install_requires = + torch>=1.8.0 + numpy>=1.19.0 + tqdm>=4.62.0 + +[options.packages.find] +where = src \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..0eb6be6 --- /dev/null +++ b/setup.py @@ -0,0 +1,25 @@ +from setuptools import setup, find_packages + +setup( + name="aiia", + version="0.1.0", + packages=find_packages(where="src"), + package_dir={"": "src"}, + install_requires=[ + "torch>=1.8.0", + "numpy>=1.19.0", + "tqdm>=4.62.0", + ], + author="Falko Habel", + author_email="falko.habel@gmx.de", + description="AIIA deep learning model implementation", + long_description=open("README.md").read(), + long_description_content_type="text/markdown", + url="https://gitea.fabelous.app/Maschine-Learning/AIIA.git", + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: Creative Commons Attribution-NonCommercial 4.0 International", + "Operating System :: OS Independent", + ], + python_requires=">=3.10", +) diff --git a/src/aiia/__init__.py b/src/aiia/__init__.py index 71acf48..6dbc27a 100644 --- a/src/aiia/__init__.py +++ b/src/aiia/__init__.py @@ -1,3 +1,5 @@ -from .model import AIIA, AIIABase, AIIAchunked, AIIAExpert, AIIAmoe, AIIArecursive, AIIABaseShared -from .data import AIIADataLoader -from .model.config import AIIAConfig \ No newline at end of file +from .model.Model import AIIABase, AIIABaseShared, AIIAchunked, AIIAExpert, AIIAmoe, AIIA, AIIArecursive +from .model.config import AIIAConfig +from .data.DataLoader import DataLoader + +__version__ = "0.1.0" diff --git a/src/aiia/data/__init__.py b/src/aiia/data/__init__.py index d1ae9b0..5e8a93c 100644 --- a/src/aiia/data/__init__.py +++ b/src/aiia/data/__init__.py @@ -1 +1,3 @@ -from .DataLoader import AIIADataLoader \ No newline at end of file +from .DataLoader import AIIADataLoader + +__all__ = ["AIIADataLoader"] diff --git a/src/aiia/model/Model.py b/src/aiia/model/Model.py index 771caf8..f0e65ff 100644 --- a/src/aiia/model/Model.py +++ b/src/aiia/model/Model.py @@ -223,4 +223,8 @@ class AIIArecursive(AIIA): processed_patches.append(pp) combined_output = torch.mean(torch.stack(processed_patches, dim=0), dim=0) - return combined_output \ No newline at end of file + return combined_output + +config = AIIAConfig() +model = AIIAmoe(config, num_experts=5) +model.save("test") \ No newline at end of file diff --git a/src/aiia/model/__init__.py b/src/aiia/model/__init__.py index 0e6a459..f68a42a 100644 --- a/src/aiia/model/__init__.py +++ b/src/aiia/model/__init__.py @@ -1,2 +1,21 @@ +from .Model import ( + AIIA, + AIIABase, + AIIABaseShared, + AIIAchunked, + AIIAExpert, + AIIAmoe, + AIIArecursive +) from .config import AIIAConfig -from .Model import AIIA, AIIABase, AIIAchunked, AIIAExpert, AIIAmoe, AIIArecursive, AIIABaseShared \ No newline at end of file + +__all__ = [ + "AIIA", + "AIIABase", + "AIIABaseShared", + "AIIAchunked", + "AIIAExpert", + "AIIAmoe", + "AIIArecursive", + "AIIAConfig" +] \ No newline at end of file From 7de7eef0810bd134819335d3938da04fcbcba53d Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Tue, 28 Jan 2025 11:16:09 +0100 Subject: [PATCH 52/55] updated pretraing to create a extra class for Pretraining --- README.md | 17 +++ src/aiia/__init__.py | 2 + src/aiia/pretrain/__init__.py | 3 + src/aiia/pretrain/pretrainer.py | 219 +++++++++++++++++++++++++++++++ src/pretrain.py | 226 -------------------------------- 5 files changed, 241 insertions(+), 226 deletions(-) create mode 100644 src/aiia/pretrain/__init__.py create mode 100644 src/aiia/pretrain/pretrainer.py delete mode 100644 src/pretrain.py diff --git a/README.md b/README.md index 0d888a0..830f111 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,19 @@ # AIIA + +## Example Usage: +```Python +if __name__ == "__main__": + data_path1 = "/root/training_data/vision-dataset/images_checkpoint.parquet" + data_path2 = "/root/training_data/vision-dataset/vec_images_dataset.parquet" + + from aiia.model import AIIABase + from aiia.model.config import AIIAConfig + from aiia.pretrain import Pretrainer + + config = AIIAConfig(model_name="AIIA-Base-512x20k") + model = AIIABase(config) + + pretrainer = Pretrainer(model, learning_rate=1e-4) + pretrainer.train(data_path1, data_path2, num_epochs=10) +``` \ No newline at end of file diff --git a/src/aiia/__init__.py b/src/aiia/__init__.py index 6dbc27a..6a27146 100644 --- a/src/aiia/__init__.py +++ b/src/aiia/__init__.py @@ -1,5 +1,7 @@ from .model.Model import AIIABase, AIIABaseShared, AIIAchunked, AIIAExpert, AIIAmoe, AIIA, AIIArecursive from .model.config import AIIAConfig from .data.DataLoader import DataLoader +from .pretrain.pretrainer import Pretrainer, ProjectionHead + __version__ = "0.1.0" diff --git a/src/aiia/pretrain/__init__.py b/src/aiia/pretrain/__init__.py new file mode 100644 index 0000000..c45cbc4 --- /dev/null +++ b/src/aiia/pretrain/__init__.py @@ -0,0 +1,3 @@ +from .pretrainer import Pretrainer, ProjectionHead + +__all__ = ["Pretrainer", "ProjectionHead"] \ No newline at end of file diff --git a/src/aiia/pretrain/pretrainer.py b/src/aiia/pretrain/pretrainer.py new file mode 100644 index 0000000..b540db0 --- /dev/null +++ b/src/aiia/pretrain/pretrainer.py @@ -0,0 +1,219 @@ +import torch +from torch import nn +import csv +import pandas as pd +from tqdm import tqdm +from ..model.Model import AIIA +from ..data.DataLoader import AIIADataLoader + +class ProjectionHead(nn.Module): + def __init__(self): + super().__init__() + self.conv_denoise = nn.Conv2d(512, 3, kernel_size=1) + self.conv_rotate = nn.Conv2d(512, 4, kernel_size=1) # 4 classes for 0, 90, 180, 270 degrees + + def forward(self, x, task='denoise'): + if task == 'denoise': + return self.conv_denoise(x) + else: + return self.conv_rotate(x).mean(dim=(2, 3)) # Global average pooling for rotation task + +class Pretrainer: + def __init__(self, model: AIIA, learning_rate=1e-4): + """ + Initialize the pretrainer with a model. + + Args: + model (AIIA): The model instance to pretrain + learning_rate (float): Learning rate for optimization + """ + self.device = "cuda" if torch.cuda.is_available() else "cpu" + self.model = model.to(self.device) + self.projection_head = ProjectionHead().to(self.device) + self.optimizer = torch.optim.AdamW( + list(self.model.parameters()) + list(self.projection_head.parameters()), + lr=learning_rate + ) + self.train_losses = [] + self.val_losses = [] + + @staticmethod + def safe_collate(batch): + """Safely collate batch data handling both denoise and rotate tasks.""" + denoise_batch = [] + rotate_batch = [] + + for sample in batch: + try: + noisy_img, target, task = sample + if task == 'denoise': + denoise_batch.append({ + 'image': noisy_img, + 'target': target, + 'task': task + }) + else: # rotate task + rotate_batch.append({ + 'image': noisy_img, + 'target': target, + 'task': task + }) + except Exception as e: + print(f"Skipping sample due to error: {e}") + continue + + if not denoise_batch and not rotate_batch: + return None + + batch_data = { + 'denoise': None, + 'rotate': None + } + + if denoise_batch: + images = torch.stack([x['image'] for x in denoise_batch]) + targets = torch.stack([x['target'] for x in denoise_batch]) + batch_data['denoise'] = (images, targets) + + if rotate_batch: + images = torch.stack([x['image'] for x in rotate_batch]) + targets = torch.stack([x['target'] for x in rotate_batch]) + batch_data['rotate'] = (images, targets) + + return batch_data + + def _process_batch(self, batch_data, criterion_denoise, criterion_rotate, training=True): + """Process a single batch of data.""" + batch_loss = 0 + + if batch_data['denoise'] is not None: + noisy_imgs, targets = batch_data['denoise'] + noisy_imgs = noisy_imgs.to(self.device) + targets = targets.to(self.device) + + features = self.model(noisy_imgs) + outputs = self.projection_head(features, task='denoise') + loss = criterion_denoise(outputs, targets) + batch_loss += loss + + if batch_data['rotate'] is not None: + imgs, targets = batch_data['rotate'] + imgs = imgs.to(self.device) + targets = targets.long().to(self.device) + + features = self.model(imgs) + outputs = self.projection_head(features, task='rotate') + loss = criterion_rotate(outputs, targets) + batch_loss += loss + + return batch_loss + + def train(self, data_path1, data_path2, num_epochs=3, batch_size=2, sample_size=10000): + """ + Train the model using the specified datasets. + + Args: + data_path1 (str): Path to first dataset + data_path2 (str): Path to second dataset + num_epochs (int): Number of training epochs + batch_size (int): Batch size for training + sample_size (int): Number of samples to use from each dataset + """ + # Read and merge datasets + df1 = pd.read_parquet(data_path1).head(sample_size) + df2 = pd.read_parquet(data_path2).head(sample_size) + merged_df = pd.concat([df1, df2], ignore_index=True) + + # Initialize data loader + aiia_loader = AIIADataLoader( + merged_df, + column="image_bytes", + batch_size=batch_size, + pretraining=True, + collate_fn=self.safe_collate + ) + + criterion_denoise = nn.MSELoss() + criterion_rotate = nn.CrossEntropyLoss() + best_val_loss = float('inf') + + for epoch in range(num_epochs): + print(f"\nEpoch {epoch+1}/{num_epochs}") + print("-" * 20) + + # Training phase + self.model.train() + self.projection_head.train() + total_train_loss = 0.0 + batch_count = 0 + + for batch_data in tqdm(aiia_loader.train_loader): + if batch_data is None: + continue + + self.optimizer.zero_grad() + batch_loss = self._process_batch(batch_data, criterion_denoise, criterion_rotate) + + if batch_loss > 0: + batch_loss.backward() + self.optimizer.step() + total_train_loss += batch_loss.item() + batch_count += 1 + + avg_train_loss = total_train_loss / max(batch_count, 1) + self.train_losses.append(avg_train_loss) + print(f"Training Loss: {avg_train_loss:.4f}") + + # Validation phase + self.model.eval() + self.projection_head.eval() + val_loss = self._validate(aiia_loader.val_loader, criterion_denoise, criterion_rotate) + + if val_loss < best_val_loss: + best_val_loss = val_loss + self.save_model("AIIA-base-512") + print("Best model saved!") + + self.save_losses('losses.csv') + + def _validate(self, val_loader, criterion_denoise, criterion_rotate): + """Perform validation and return average validation loss.""" + val_loss = 0.0 + val_batch_count = 0 + + with torch.no_grad(): + for batch_data in val_loader: + if batch_data is None: + continue + + batch_loss = self._process_batch( + batch_data, criterion_denoise, criterion_rotate, training=False + ) + + if batch_loss > 0: + val_loss += batch_loss.item() + val_batch_count += 1 + + avg_val_loss = val_loss / max(val_batch_count, 1) + self.val_losses.append(avg_val_loss) + print(f"Validation Loss: {avg_val_loss:.4f}") + return avg_val_loss + + def save_model(self, path): + """Save the model and projection head.""" + self.model.save(path) + torch.save(self.projection_head.state_dict(), f"{path}_projection_head.pth") + + def save_losses(self, csv_file): + """Save training and validation losses to a CSV file.""" + data = list(zip( + range(1, len(self.train_losses) + 1), + self.train_losses, + self.val_losses + )) + + with open(csv_file, mode='w', newline='') as file: + writer = csv.writer(file) + writer.writerow(['Epoch', 'Train Loss', 'Validation Loss']) + writer.writerows(data) + print(f"Loss data has been written to {csv_file}") \ No newline at end of file diff --git a/src/pretrain.py b/src/pretrain.py deleted file mode 100644 index 02e4e7f..0000000 --- a/src/pretrain.py +++ /dev/null @@ -1,226 +0,0 @@ -import torch -from torch import nn -import csv -import pandas as pd -from aiia.model.config import AIIAConfig -from aiia.model import AIIABase -from aiia.data.DataLoader import AIIADataLoader -from tqdm import tqdm - -class ProjectionHead(nn.Module): - def __init__(self): - super().__init__() - self.conv_denoise = nn.Conv2d(512, 3, kernel_size=1) - self.conv_rotate = nn.Conv2d(512, 4, kernel_size=1) # 4 classes for 0, 90, 180, 270 degrees - - def forward(self, x, task='denoise'): - if task == 'denoise': - return self.conv_denoise(x) - else: - return self.conv_rotate(x).mean(dim=(2, 3)) # Global average pooling for rotation task - -def pretrain_model(data_path1, data_path2, num_epochs=3): - # Read and merge datasets - df1 = pd.read_parquet(data_path1).head(10000) - df2 = pd.read_parquet(data_path2).head(10000) - merged_df = pd.concat([df1, df2], ignore_index=True) - - # Model configuration - config = AIIAConfig( - model_name="AIIA-Base-512x20k", - ) - - # Initialize model and projection head - model = AIIABase(config) - projection_head = ProjectionHead() - - device = "cuda" if torch.cuda.is_available() else "cpu" - model.to(device) - projection_head.to(device) - - def safe_collate(batch): - denoise_batch = [] - rotate_batch = [] - - for sample in batch: - try: - noisy_img, target, task = sample - if task == 'denoise': - denoise_batch.append({ - 'image': noisy_img, - 'target': target, - 'task': task - }) - else: # rotate task - rotate_batch.append({ - 'image': noisy_img, - 'target': target, - 'task': task - }) - except Exception as e: - print(f"Skipping sample due to error: {e}") - continue - - if not denoise_batch and not rotate_batch: - return None - - batch_data = { - 'denoise': None, - 'rotate': None - } - - if denoise_batch: - images = torch.stack([x['image'] for x in denoise_batch]) - targets = torch.stack([x['target'] for x in denoise_batch]) - batch_data['denoise'] = (images, targets) - - if rotate_batch: - images = torch.stack([x['image'] for x in rotate_batch]) - targets = torch.stack([x['target'] for x in rotate_batch]) - batch_data['rotate'] = (images, targets) - - return batch_data - - aiia_loader = AIIADataLoader( - merged_df, - column="image_bytes", - batch_size=2, - pretraining=True, - collate_fn=safe_collate - ) - - train_loader = aiia_loader.train_loader - val_loader = aiia_loader.val_loader - - criterion_denoise = nn.MSELoss() - criterion_rotate = nn.CrossEntropyLoss() - - # Update optimizer to include projection head parameters - optimizer = torch.optim.AdamW( - list(model.parameters()) + list(projection_head.parameters()), - lr=config.learning_rate - ) - - best_val_loss = float('inf') - train_losses = [] - val_losses = [] - for epoch in range(num_epochs): - print(f"\nEpoch {epoch+1}/{num_epochs}") - print("-" * 20) - - # Training phase - model.train() - projection_head.train() - total_train_loss = 0.0 - batch_count = 0 - - for batch_data in tqdm(train_loader): - if batch_data is None: - continue - - optimizer.zero_grad() - batch_loss = 0 - - # Handle denoise task - if batch_data['denoise'] is not None: - noisy_imgs, targets = batch_data['denoise'] - noisy_imgs = noisy_imgs.to(device) - targets = targets.to(device) - - # Get features from base model - features = model(noisy_imgs) - # Project features back to image space - outputs = projection_head(features, task='denoise') - loss = criterion_denoise(outputs, targets) - batch_loss += loss - - # Handle rotate task - if batch_data['rotate'] is not None: - imgs, targets = batch_data['rotate'] - imgs = imgs.to(device) - targets = targets.long().to(device) - - # Get features from base model - features = model(imgs) - # Project features to rotation predictions - outputs = projection_head(features, task='rotate') - - loss = criterion_rotate(outputs, targets) - batch_loss += loss - - if batch_loss > 0: - batch_loss.backward() - optimizer.step() - total_train_loss += batch_loss.item() - batch_count += 1 - - avg_train_loss = total_train_loss / max(batch_count, 1) - train_losses.append(avg_train_loss) - print(f"Training Loss: {avg_train_loss:.4f}") - - # Validation phase - model.eval() - projection_head.eval() - val_loss = 0.0 - val_batch_count = 0 - - with torch.no_grad(): - for batch_data in val_loader: - if batch_data is None: - continue - - batch_loss = 0 - - if batch_data['denoise'] is not None: - noisy_imgs, targets = batch_data['denoise'] - noisy_imgs = noisy_imgs.to(device) - targets = targets.to(device) - - features = model(noisy_imgs) - outputs = projection_head(features, task='denoise') - loss = criterion_denoise(outputs, targets) - batch_loss += loss - - if batch_data['rotate'] is not None: - imgs, targets = batch_data['rotate'] - imgs = imgs.to(device) - targets = targets.long().to(device) - - features = model(imgs) - outputs = projection_head(features, task='rotate') - loss = criterion_rotate(outputs, targets) - batch_loss += loss - - if batch_loss > 0: - val_loss += batch_loss.item() - val_batch_count += 1 - - avg_val_loss = val_loss / max(val_batch_count, 1) - val_losses.append(avg_val_loss) - print(f"Validation Loss: {avg_val_loss:.4f}") - - if avg_val_loss < best_val_loss: - best_val_loss = avg_val_loss - # Save both model and projection head - model.save("AIIA-base-512") - print("Best model saved!") - - # Prepare the data to be written to the CSV file - data = list(zip(range(1, len(train_losses) + 1), train_losses, val_losses)) - - # Specify the CSV file name - csv_file = 'losses.csv' - - # Write the data to the CSV file - with open(csv_file, mode='w', newline='') as file: - writer = csv.writer(file) - # Write the header - writer.writerow(['Epoch', 'Train Loss', 'Validation Loss']) - # Write the data - writer.writerows(data) - print(f"Data has been written to {csv_file}") - -if __name__ == "__main__": - data_path1 = "/root/training_data/vision-dataset/images_checkpoint.parquet" - data_path2 = "/root/training_data/vision-dataset/vec_images_dataset.parquet" - pretrain_model(data_path1, data_path2, num_epochs=10) \ No newline at end of file From a369c49f15eb1213cdf2dd952d12144e0fc62d82 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Tue, 28 Jan 2025 11:27:42 +0100 Subject: [PATCH 53/55] improved pretraining --- README.md | 37 +++++++++++++++++++++------------ example.py | 27 ++++++++++++++++++++++++ src/aiia/pretrain/pretrainer.py | 29 ++++++++++++++++++-------- 3 files changed, 71 insertions(+), 22 deletions(-) create mode 100644 example.py diff --git a/README.md b/README.md index 830f111..6f149b1 100644 --- a/README.md +++ b/README.md @@ -3,17 +3,28 @@ ## Example Usage: ```Python -if __name__ == "__main__": - data_path1 = "/root/training_data/vision-dataset/images_checkpoint.parquet" - data_path2 = "/root/training_data/vision-dataset/vec_images_dataset.parquet" - - from aiia.model import AIIABase - from aiia.model.config import AIIAConfig - from aiia.pretrain import Pretrainer - - config = AIIAConfig(model_name="AIIA-Base-512x20k") - model = AIIABase(config) - - pretrainer = Pretrainer(model, learning_rate=1e-4) - pretrainer.train(data_path1, data_path2, num_epochs=10) +from aiia.model import AIIABase +from aiia.model.config import AIIAConfig +from aiia.pretrain import Pretrainer + +# Create your model +config = AIIAConfig(model_name="AIIA-Base-512x20k") +model = AIIABase(config) + +# Initialize pretrainer with the model +pretrainer = Pretrainer(model, learning_rate=1e-4) + +# List of dataset paths +dataset_paths = [ + "/path/to/dataset1.parquet", + "/path/to/dataset2.parquet" +] + +# Start training with multiple datasets +pretrainer.train( + dataset_paths=dataset_paths, + num_epochs=10, + batch_size=2, + sample_size=10000 +) ``` \ No newline at end of file diff --git a/example.py b/example.py new file mode 100644 index 0000000..6e1620b --- /dev/null +++ b/example.py @@ -0,0 +1,27 @@ +data_path1 = "/root/training_data/vision-dataset/images_checkpoint.parquet" +data_path2 = "/root/training_data/vision-dataset/vec_images_dataset.parquet" + +from aiia.model import AIIABase +from aiia.model.config import AIIAConfig +from aiia.pretrain import Pretrainer + +# Create your model +config = AIIAConfig(model_name="AIIA-Base-512x10k-small", num_hidden_layers=6, hidden_size=256) +model = AIIABase(config) + +# Initialize pretrainer with the model +pretrainer = Pretrainer(model, learning_rate=config.learning_rate) + +# List of dataset paths +dataset_paths = [ + data_path1, + data_path2 +] + +# Start training with multiple datasets +pretrainer.train( + dataset_paths=dataset_paths, + num_epochs=10, + batch_size=2, + sample_size=10000 +) \ No newline at end of file diff --git a/src/aiia/pretrain/pretrainer.py b/src/aiia/pretrain/pretrainer.py index b540db0..913b77a 100644 --- a/src/aiia/pretrain/pretrainer.py +++ b/src/aiia/pretrain/pretrainer.py @@ -108,26 +108,37 @@ class Pretrainer: return batch_loss - def train(self, data_path1, data_path2, num_epochs=3, batch_size=2, sample_size=10000): + def train(self, dataset_paths, column="image_bytes", num_epochs=3, batch_size=2, sample_size=10000): """ - Train the model using the specified datasets. + Train the model using multiple specified datasets. Args: - data_path1 (str): Path to first dataset - data_path2 (str): Path to second dataset + dataset_paths (list): List of paths to parquet datasets num_epochs (int): Number of training epochs batch_size (int): Batch size for training sample_size (int): Number of samples to use from each dataset """ - # Read and merge datasets - df1 = pd.read_parquet(data_path1).head(sample_size) - df2 = pd.read_parquet(data_path2).head(sample_size) - merged_df = pd.concat([df1, df2], ignore_index=True) + if not dataset_paths: + raise ValueError("No dataset paths provided") + + # Read and merge all datasets + dataframes = [] + for path in dataset_paths: + try: + df = pd.read_parquet(path).head(sample_size) + dataframes.append(df) + except Exception as e: + print(f"Error loading dataset {path}: {e}") + + if not dataframes: + raise ValueError("No valid datasets could be loaded") + + merged_df = pd.concat(dataframes, ignore_index=True) # Initialize data loader aiia_loader = AIIADataLoader( merged_df, - column="image_bytes", + column=column, batch_size=batch_size, pretraining=True, collate_fn=self.safe_collate From 3631df7f0af72be95a35cb4b21f54279e0846cf8 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Tue, 28 Jan 2025 11:42:03 +0100 Subject: [PATCH 54/55] updated pretrainer to handle multiple classes and configs. --- example.py | 2 +- src/aiia/pretrain/pretrainer.py | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/example.py b/example.py index 6e1620b..6d605ca 100644 --- a/example.py +++ b/example.py @@ -10,7 +10,7 @@ config = AIIAConfig(model_name="AIIA-Base-512x10k-small", num_hidden_layers=6, h model = AIIABase(config) # Initialize pretrainer with the model -pretrainer = Pretrainer(model, learning_rate=config.learning_rate) +pretrainer = Pretrainer(model, learning_rate=config.learning_rate, config=config) # List of dataset paths dataset_paths = [ diff --git a/src/aiia/pretrain/pretrainer.py b/src/aiia/pretrain/pretrainer.py index 913b77a..fa84fcb 100644 --- a/src/aiia/pretrain/pretrainer.py +++ b/src/aiia/pretrain/pretrainer.py @@ -4,13 +4,15 @@ import csv import pandas as pd from tqdm import tqdm from ..model.Model import AIIA +from ..model.config import AIIAConfig from ..data.DataLoader import AIIADataLoader + class ProjectionHead(nn.Module): - def __init__(self): + def __init__(self, hidden_size): super().__init__() - self.conv_denoise = nn.Conv2d(512, 3, kernel_size=1) - self.conv_rotate = nn.Conv2d(512, 4, kernel_size=1) # 4 classes for 0, 90, 180, 270 degrees + self.conv_denoise = nn.Conv2d(hidden_size, 3, kernel_size=1) + self.conv_rotate = nn.Conv2d(hidden_size, 4, kernel_size=1) # 4 classes for 0, 90, 180, 270 degrees def forward(self, x, task='denoise'): if task == 'denoise': @@ -19,17 +21,19 @@ class ProjectionHead(nn.Module): return self.conv_rotate(x).mean(dim=(2, 3)) # Global average pooling for rotation task class Pretrainer: - def __init__(self, model: AIIA, learning_rate=1e-4): + def __init__(self, model: AIIA, learning_rate=1e-4, config: AIIAConfig=None): """ Initialize the pretrainer with a model. Args: model (AIIA): The model instance to pretrain learning_rate (float): Learning rate for optimization + config (dict): Model configuration containing hidden_size """ self.device = "cuda" if torch.cuda.is_available() else "cpu" self.model = model.to(self.device) - self.projection_head = ProjectionHead().to(self.device) + hidden_size = config.hidden_size + self.projection_head = ProjectionHead(hidden_size).to(self.device) self.optimizer = torch.optim.AdamW( list(self.model.parameters()) + list(self.projection_head.parameters()), lr=learning_rate From 3c0e9e8ac1be6e88551ecc1304acfb1c75d4d311 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Tue, 28 Jan 2025 17:18:21 +0100 Subject: [PATCH 55/55] fixed savings --- run.py | 27 +++++++++++++++++++++++++++ src/aiia/pretrain/pretrainer.py | 6 +----- 2 files changed, 28 insertions(+), 5 deletions(-) create mode 100644 run.py diff --git a/run.py b/run.py new file mode 100644 index 0000000..fb20e63 --- /dev/null +++ b/run.py @@ -0,0 +1,27 @@ +data_path1 = "/root/training_data/vision-dataset/images_pretrain.parquet" +data_path2 = "/root/training_data/vision-dataset/vector_img_pretrain.parquet" + +from aiia.model import AIIABase +from aiia.model.config import AIIAConfig +from aiia.pretrain import Pretrainer + +# Create your model +config = AIIAConfig(model_name="AIIA-Base-512x20k") +model = AIIABase(config) + +# Initialize pretrainer with the model +pretrainer = Pretrainer(model, learning_rate=config.learning_rate, config=config) + +# List of dataset paths +dataset_paths = [ + data_path1, + data_path2 +] + +# Start training with multiple datasets +pretrainer.train( + dataset_paths=dataset_paths, + num_epochs=10, + batch_size=2, + sample_size=10000 +) \ No newline at end of file diff --git a/src/aiia/pretrain/pretrainer.py b/src/aiia/pretrain/pretrainer.py index fa84fcb..30ebc92 100644 --- a/src/aiia/pretrain/pretrainer.py +++ b/src/aiia/pretrain/pretrainer.py @@ -186,7 +186,7 @@ class Pretrainer: if val_loss < best_val_loss: best_val_loss = val_loss - self.save_model("AIIA-base-512") + self.model.save("AIIA-base-512") print("Best model saved!") self.save_losses('losses.csv') @@ -214,10 +214,6 @@ class Pretrainer: print(f"Validation Loss: {avg_val_loss:.4f}") return avg_val_loss - def save_model(self, path): - """Save the model and projection head.""" - self.model.save(path) - torch.save(self.projection_head.state_dict(), f"{path}_projection_head.pth") def save_losses(self, csv_file): """Save training and validation losses to a CSV file."""