From 32daaadddd58f61ba78d5e609428ce6a6a8ee6f9 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 13:05:24 +0100 Subject: [PATCH 01/32] updated pretrain method --- src/pretrain.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index daea216..fea60a0 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -34,7 +34,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): batch_size=32, val_split=0.2, seed=42, - column="file_path", + column="image_bytes", label_column=None ) @@ -71,8 +71,6 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): # Training phase model.train() total_train_loss = 0.0 - denoise_train_loss = 0.0 - rotate_train_loss = 0.0 for batch in train_dataloader: images, targets, tasks = zip(*batch) @@ -144,6 +142,6 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): print("Best model saved!") if __name__ == "__main__": - data_path1 = "/root/training_data/vision-dataset/images_dataset.parquet" + data_path1 = "/root/training_data/vision-dataset/images_checkpoint.parquet" data_path2 = "/root/training_data/vision-dataset/vec_images_dataset.parquet" pretrain_model(data_path1, data_path2, num_epochs=8) \ No newline at end of file From e5a56181604a77034e3bd12a28ef1c9773f9bb7f Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 13:09:23 +0100 Subject: [PATCH 02/32] correct copying --- src/aiia/model/Model.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/aiia/model/Model.py b/src/aiia/model/Model.py index a454c8c..fafab52 100644 --- a/src/aiia/model/Model.py +++ b/src/aiia/model/Model.py @@ -1,8 +1,9 @@ -from config import AIIAConfig +from .config import AIIAConfig from torch import nn import torch import os -import copy # Add this for deep copying +import copy + class AIIA(nn.Module): def __init__(self, config: AIIAConfig, **kwargs): From 59b2784e924c9d6329c702ce6e52797c3ab88b33 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 13:10:24 +0100 Subject: [PATCH 03/32] fixed spelling error --- src/aiia/model/Model.py | 7 +------ src/aiia/model/__init__.py | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/aiia/model/Model.py b/src/aiia/model/Model.py index fafab52..a9584fa 100644 --- a/src/aiia/model/Model.py +++ b/src/aiia/model/Model.py @@ -222,9 +222,4 @@ class AIIArecursive(AIIA): processed_patches.append(pp) combined_output = torch.mean(torch.stack(processed_patches, dim=0), dim=0) - return combined_output - -config = AIIAConfig() -model2 = AIIABaseShared(config) - -model2.save("shared") \ No newline at end of file + return combined_output \ No newline at end of file diff --git a/src/aiia/model/__init__.py b/src/aiia/model/__init__.py index 5757152..66cec7e 100644 --- a/src/aiia/model/__init__.py +++ b/src/aiia/model/__init__.py @@ -1,2 +1,2 @@ from .config import AIIAConfig -from .Model import AIIA, AIIABase, AIIAchunked, AIIAExpert, AIIAmoe, AIIAresursive \ No newline at end of file +from .Model import AIIA, AIIABase, AIIAchunked, AIIAExpert, AIIAmoe, AIIArecursive \ No newline at end of file From 338ac5dee59a7603c775ca39ec0f0c71a6aa217c Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 13:26:01 +0100 Subject: [PATCH 04/32] corrected imports --- src/aiia/__init__.py | 3 +-- src/aiia/model/__init__.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/aiia/__init__.py b/src/aiia/__init__.py index 25b8128..71acf48 100644 --- a/src/aiia/__init__.py +++ b/src/aiia/__init__.py @@ -1,4 +1,3 @@ -# Import submodules -from .model import AIIA, AIIAEncoder +from .model import AIIA, AIIABase, AIIAchunked, AIIAExpert, AIIAmoe, AIIArecursive, AIIABaseShared from .data import AIIADataLoader from .model.config import AIIAConfig \ No newline at end of file diff --git a/src/aiia/model/__init__.py b/src/aiia/model/__init__.py index 66cec7e..0e6a459 100644 --- a/src/aiia/model/__init__.py +++ b/src/aiia/model/__init__.py @@ -1,2 +1,2 @@ from .config import AIIAConfig -from .Model import AIIA, AIIABase, AIIAchunked, AIIAExpert, AIIAmoe, AIIArecursive \ No newline at end of file +from .Model import AIIA, AIIABase, AIIAchunked, AIIAExpert, AIIAmoe, AIIArecursive, AIIABaseShared \ No newline at end of file From 00168af32de4706abef73a3f6cc7e4f165c3da60 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 13:48:52 +0100 Subject: [PATCH 05/32] corrected init --- src/aiia/data/DataLoader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index d3cb900..bcfe28a 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -92,7 +92,7 @@ class AIIADataLoader(DataLoader): seed=42, column="file_path", label_column=None): - super().__init__() + super().__init__(dataset) self.batch_size = batch_size self.val_split = val_split From b7dc835a86aff011bf6c72d51613df620d734568 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 16:20:33 +0100 Subject: [PATCH 06/32] fixed dataloading --- src/aiia/data/DataLoader.py | 32 ++++++++++++++++++-------------- src/pretrain.py | 30 +++++++++--------------------- 2 files changed, 27 insertions(+), 35 deletions(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index bcfe28a..45daa7d 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -85,21 +85,19 @@ class JPGImageLoader: print(f"Skipped {self.skipped_count} images due to errors.") -class AIIADataLoader(DataLoader): +class AIIADataLoader: def __init__(self, dataset, batch_size=32, val_split=0.2, seed=42, column="file_path", - label_column=None): - super().__init__(dataset) - + label_column=None, + **dataloader_kwargs): self.batch_size = batch_size self.val_split = val_split self.seed = seed + random.seed(seed) - # Determine which loader to use based on the dataset's content - # Check if any entry in bytes_column is a bytes or bytestring type is_bytes_or_bytestring = any( isinstance(value, (bytes, memoryview)) for value in dataset[column].dropna().head(1).astype(str) @@ -112,10 +110,8 @@ class AIIADataLoader(DataLoader): label_column=label_column ) else: - # Check if file_path column contains valid image file paths (at least one entry) sample_paths = dataset[column].dropna().head(1).astype(str) - # Regex pattern for matching image file paths (adjust as needed) filepath_pattern = r'.*(?:/|\\).*\.([jJ][pP][gG]|png|gif)$' if any( @@ -128,23 +124,33 @@ class AIIADataLoader(DataLoader): label_column=label_column ) else: - # If neither condition is met, default to JPGImageLoader (assuming bytes are stored as strings) self.loader = JPGImageLoader( dataset, bytes_column=column, label_column=label_column ) - # Get all items self.items = [self.loader.get_item(idx) for idx in range(len(dataset))] - # Split into train and validation sets train_indices, val_indices = self._split_data() - # Create datasets for training and validation self.train_dataset = self._create_subset(train_indices) self.val_dataset = self._create_subset(val_indices) + self.train_loader = DataLoader( + self.train_dataset, + batch_size=batch_size, + shuffle=True, + **dataloader_kwargs + ) + + self.val_loader = DataLoader( + self.val_dataset, + batch_size=batch_size, + shuffle=False, + **dataloader_kwargs + ) + def _split_data(self): if len(self.items) == 0: return [], [] @@ -184,7 +190,6 @@ class AIIADataset(torch.utils.data.Dataset): return (image, label) elif isinstance(item, tuple) and len(item) == 3: image, task, label = item - # Handle tasks accordingly (e.g., apply different augmentations) if task == 'denoise': noise_std = 0.1 noisy_img = image + torch.randn_like(image) * noise_std @@ -199,7 +204,6 @@ class AIIADataset(torch.utils.data.Dataset): else: raise ValueError(f"Unknown task: {task}") else: - # Handle single images without labels or tasks if isinstance(item, Image.Image): return item else: diff --git a/src/pretrain.py b/src/pretrain.py index fea60a0..6fc9922 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -29,29 +29,17 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): model = AIIABase(config) # Create dataset loader with merged data - train_dataset = AIIADataLoader( + aiia_loader = AIIADataLoader( merged_df, batch_size=32, val_split=0.2, seed=42, - column="image_bytes", - label_column=None + column="image_bytes" ) - # Create separate dataloaders for training and validation sets - train_dataloader = DataLoader( - train_dataset.train_dataset, - batch_size=train_dataset.batch_size, - shuffle=True, - num_workers=4 - ) - - val_dataloader = DataLoader( - train_dataset.val_ataset, - batch_size=train_dataset.batch_size, - shuffle=False, - num_workers=4 - ) + # Access the train and validation loaders + train_loader = aiia_loader.train_loader + val_loader = aiia_loader.val_loader # Initialize loss functions and optimizer criterion_denoise = nn.MSELoss() @@ -72,7 +60,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): model.train() total_train_loss = 0.0 - for batch in train_dataloader: + for batch in train_loader: images, targets, tasks = zip(*batch) if device == "cuda": @@ -102,14 +90,14 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): total_train_loss += avg_loss.item() # Separate losses for reporting (you'd need to track this based on tasks) - avg_total_train_loss = total_train_loss / len(train_dataloader) + avg_total_train_loss = total_train_loss / len(train_loader) print(f"Training Loss: {avg_total_train_loss:.4f}") # Validation phase model.eval() with torch.no_grad(): val_losses = [] - for batch in val_dataloader: + for batch in val_loader: images, targets, tasks = zip(*batch) if device == "cuda": @@ -132,7 +120,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): avg_val_loss = total_loss / len(images) val_losses.append(avg_val_loss.item()) - avg_val_loss = sum(val_losses) / len(val_dataloader) + avg_val_loss = sum(val_losses) / len(val_loader) print(f"Validation Loss: {avg_val_loss:.4f}") # Save the best model From 9124221346f2476221ee4b39b9ecf2de396e56a2 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 17:26:13 +0100 Subject: [PATCH 07/32] correct loading files --- src/aiia/data/DataLoader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index 45daa7d..51da94e 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -53,7 +53,7 @@ class JPGImageLoader: self.successful_count = 0 self.skipped_count = 0 - if self.bytes_column not in dataset.column_names: + if self.bytes_column not in dataset.columns: raise ValueError(f"Column '{self.bytes_column}' not found in dataset.") def _get_image(self, item): From b5da2e477d62bdef7e1e74b59d35e6fb3da31dbf Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 18:15:05 +0100 Subject: [PATCH 08/32] correct allogcation for indexes --- src/aiia/data/DataLoader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index 51da94e..d80b2c7 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -28,7 +28,7 @@ class FilePathLoader: return None def get_item(self, idx): - item = self.dataset[idx] + item = self.dataset.iloc[idx] image = self._get_image(item) if image is not None: self.successful_count += 1 @@ -67,7 +67,7 @@ class JPGImageLoader: return None def get_item(self, idx): - item = self.dataset[idx] + item = self.dataset.iloc[idx] image = self._get_image(item) if image is not None: self.successful_count += 1 From 51da6b5aa2764614e981fc6e99fe5606d30375c8 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 18:19:54 +0100 Subject: [PATCH 09/32] corrected loading --- src/aiia/data/DataLoader.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index d80b2c7..1813749 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -5,7 +5,7 @@ from torch.utils.data import DataLoader from torchvision import transforms import random import re - +import base64 class FilePathLoader: def __init__(self, dataset, file_path_column="file_path", label_column=None): @@ -55,16 +55,26 @@ class JPGImageLoader: if self.bytes_column not in dataset.columns: raise ValueError(f"Column '{self.bytes_column}' not found in dataset.") - + def _get_image(self, item): try: - bytes_data = item[self.bytes_column] + # Retrieve the string data + data = item[self.bytes_column] + + # Check if the data is a string, and decode it + if isinstance(data, str): + bytes_data = base64.b64decode(data) # Adjust decoding as per your data encoding format + else: + bytes_data = data + + # Load the bytes into a BytesIO object and open the image img_bytes = io.BytesIO(bytes_data) image = Image.open(img_bytes).convert("RGB") return image except Exception as e: print(f"Error loading image from bytes: {e}") return None + def get_item(self, idx): item = self.dataset.iloc[idx] From e0abdb9d39ef782ed5eecbbcf167b323c038beae Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 18:40:19 +0100 Subject: [PATCH 10/32] fixed datasplitting --- src/aiia/data/DataLoader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index 1813749..98b882b 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -165,7 +165,8 @@ class AIIADataLoader: if len(self.items) == 0: return [], [] - tasks = [item[1] if len(item) > 1 and hasattr(item, '__getitem__') else None for item in self.items] + tasks = [item[1] for item in self.items if len(item) > 1 and hasattr(item, '__getitem__') and item[1] is not None] + unique_tasks = list(set(tasks)) if tasks.count(None) < len(tasks) else [] train_indices = [] From a8cd9b00e5823c8593ec534d56c1022035c6597f Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 18:41:14 +0100 Subject: [PATCH 11/32] limit data loadng to 10k --- src/pretrain.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index 6fc9922..09e9856 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -12,8 +12,8 @@ from aiia.data.DataLoader import AIIADataLoader def pretrain_model(data_path1, data_path2, num_epochs=3): # Merge the two parquet files - df1 = pd.read_parquet(data_path1) - df2 = pd.read_parquet(data_path2) + df1 = pd.read_parquet(data_path1).head(10000) + df2 = pd.read_parquet(data_path2).head(10000) merged_df = pd.concat([df1, df2], ignore_index=True) # Create a new AIIAConfig instance From 7a1eb8bd3098360035b835d83a5a8a7dfe091bca Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 21:58:13 +0100 Subject: [PATCH 12/32] updated loader --- src/aiia/data/DataLoader.py | 102 ++++++++++++------------------------ src/pretrain.py | 14 +---- 2 files changed, 36 insertions(+), 80 deletions(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index 98b882b..223d146 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -58,23 +58,22 @@ class JPGImageLoader: def _get_image(self, item): try: - # Retrieve the string data data = item[self.bytes_column] - # Check if the data is a string, and decode it - if isinstance(data, str): - bytes_data = base64.b64decode(data) # Adjust decoding as per your data encoding format + if isinstance(data, str) and data.startswith("b'"): + cleaned_data = data[2:-1].encode('latin1').decode('unicode-escape').encode('latin1') + bytes_data = cleaned_data + elif isinstance(data, str): + bytes_data = base64.b64decode(data) else: bytes_data = data - # Load the bytes into a BytesIO object and open the image img_bytes = io.BytesIO(bytes_data) image = Image.open(img_bytes).convert("RGB") return image except Exception as e: print(f"Error loading image from bytes: {e}") return None - def get_item(self, idx): item = self.dataset.iloc[idx] @@ -93,94 +92,61 @@ class JPGImageLoader: def print_summary(self): print(f"Successfully converted {self.successful_count} images.") print(f"Skipped {self.skipped_count} images due to errors.") - class AIIADataLoader: - def __init__(self, dataset, - batch_size=32, - val_split=0.2, - seed=42, - column="file_path", - label_column=None, - **dataloader_kwargs): + def __init__(self, dataset, batch_size=32, val_split=0.2, seed=42, column="file_path", label_column=None, **dataloader_kwargs): self.batch_size = batch_size self.val_split = val_split self.seed = seed random.seed(seed) - is_bytes_or_bytestring = any( - isinstance(value, (bytes, memoryview)) - for value in dataset[column].dropna().head(1).astype(str) + sample_value = dataset[column].iloc[0] + is_bytes_or_bytestring = isinstance(sample_value, (bytes, str)) and ( + isinstance(sample_value, bytes) or + sample_value.startswith("b'") or + sample_value.startswith(('b"', 'data:image')) ) if is_bytes_or_bytestring: - self.loader = JPGImageLoader( - dataset, - bytes_column=column, - label_column=label_column - ) + self.loader = JPGImageLoader(dataset, bytes_column=column, label_column=label_column) else: sample_paths = dataset[column].dropna().head(1).astype(str) + filepath_pattern = r'.*(?:/|\\).*\.([jJ][pP][gG]|[pP][nN][gG]|[gG][iI][fF])$' - filepath_pattern = r'.*(?:/|\\).*\.([jJ][pP][gG]|png|gif)$' - - if any( - re.match(filepath_pattern, path, flags=re.IGNORECASE) - for path in sample_paths - ): - self.loader = FilePathLoader( - dataset, - file_path_column=column, - label_column=label_column - ) + if any(re.match(filepath_pattern, path, flags=re.IGNORECASE) for path in sample_paths): + self.loader = FilePathLoader(dataset, file_path_column=column, label_column=label_column) else: - self.loader = JPGImageLoader( - dataset, - bytes_column=column, - label_column=label_column - ) + self.loader = JPGImageLoader(dataset, bytes_column=column, label_column=label_column) - self.items = [self.loader.get_item(idx) for idx in range(len(dataset))] + self.items = [] + for idx in range(len(dataset)): + item = self.loader.get_item(idx) + if item is not None: + self.items.append(item) + if not self.items: + raise ValueError("No valid items were loaded from the dataset") + train_indices, val_indices = self._split_data() self.train_dataset = self._create_subset(train_indices) self.val_dataset = self._create_subset(val_indices) - self.train_loader = DataLoader( - self.train_dataset, - batch_size=batch_size, - shuffle=True, - **dataloader_kwargs - ) - - self.val_loader = DataLoader( - self.val_dataset, - batch_size=batch_size, - shuffle=False, - **dataloader_kwargs - ) + self.train_loader = DataLoader(self.train_dataset, batch_size=batch_size, shuffle=True, **dataloader_kwargs) + self.val_loader = DataLoader(self.val_dataset, batch_size=batch_size, shuffle=False, **dataloader_kwargs) def _split_data(self): if len(self.items) == 0: - return [], [] + raise ValueError("No items to split") - tasks = [item[1] for item in self.items if len(item) > 1 and hasattr(item, '__getitem__') and item[1] is not None] - - unique_tasks = list(set(tasks)) if tasks.count(None) < len(tasks) else [] + num_samples = len(self.items) + indices = list(range(num_samples)) + random.shuffle(indices) - train_indices = [] - val_indices = [] + split_idx = int((1 - self.val_split) * num_samples) + train_indices = indices[:split_idx] + val_indices = indices[split_idx:] - for task in unique_tasks: - task_indices = [i for i, t in enumerate(tasks) if t == task] - n_val = int(len(task_indices) * self.val_split) - - random.shuffle(task_indices) - - val_indices.extend(task_indices[:n_val]) - train_indices.extend(task_indices[n_val:]) - return train_indices, val_indices def _create_subset(self, indices): @@ -218,4 +184,4 @@ class AIIADataset(torch.utils.data.Dataset): if isinstance(item, Image.Image): return item else: - raise ValueError("Invalid item format.") + raise ValueError("Invalid item format.") \ No newline at end of file diff --git a/src/pretrain.py b/src/pretrain.py index 09e9856..78ce63a 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -18,24 +18,14 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): # Create a new AIIAConfig instance config = AIIAConfig( - model_name="AIIA-512x", - hidden_size=512, - num_hidden_layers=12, - kernel_size=5, - learning_rate=5e-5 + model_name="AIIA-Base-512x20k", ) # Initialize the base model model = AIIABase(config) # Create dataset loader with merged data - aiia_loader = AIIADataLoader( - merged_df, - batch_size=32, - val_split=0.2, - seed=42, - column="image_bytes" - ) + aiia_loader = AIIADataLoader(merged_df, column="image_bytes", batch_size=32) # Access the train and validation loaders train_loader = aiia_loader.train_loader From cae3fa7fb30ca96208a3b6de2eaea461fd313cab Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 22:08:59 +0100 Subject: [PATCH 13/32] proper image transformation --- src/aiia/data/DataLoader.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index 223d146..1f5d75a 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -152,10 +152,13 @@ class AIIADataLoader: def _create_subset(self, indices): subset_items = [self.items[i] for i in indices] return AIIADataset(subset_items) - + class AIIADataset(torch.utils.data.Dataset): def __init__(self, items): self.items = items + self.transform = transforms.Compose([ + transforms.ToTensor() + ]) def __len__(self): return len(self.items) @@ -164,9 +167,14 @@ class AIIADataset(torch.utils.data.Dataset): item = self.items[idx] if isinstance(item, tuple) and len(item) == 2: image, label = item + # Convert PIL image to tensor + image = self.transform(image) return (image, label) elif isinstance(item, tuple) and len(item) == 3: image, task, label = item + # Convert PIL image to tensor first + image = self.transform(image) + if task == 'denoise': noise_std = 0.1 noisy_img = image + torch.randn_like(image) * noise_std @@ -182,6 +190,7 @@ class AIIADataset(torch.utils.data.Dataset): raise ValueError(f"Unknown task: {task}") else: if isinstance(item, Image.Image): - return item + # Convert single PIL image to tensor + return self.transform(item) else: - raise ValueError("Invalid item format.") \ No newline at end of file + raise ValueError("Invalid item format.") From 3f6e6514a9770523f248b5874c0e8a264e695a62 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 22:17:37 +0100 Subject: [PATCH 14/32] rgba conversion to rgb --- src/aiia/data/DataLoader.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index 1f5d75a..78f4eb5 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -21,7 +21,13 @@ class FilePathLoader: def _get_image(self, item): try: path = item[self.file_path_column] - image = Image.open(path).convert("RGB") + image = Image.open(path) + if image.mode == 'RGBA': + background = Image.new('RGB', image.size, (0, 0, 0)) + background.paste(image, mask=image.split()[3]) + image = background + elif image.mode != 'RGB': + image = image.convert('RGB') return image except Exception as e: print(f"Error loading image from {path}: {e}") @@ -69,7 +75,13 @@ class JPGImageLoader: bytes_data = data img_bytes = io.BytesIO(bytes_data) - image = Image.open(img_bytes).convert("RGB") + image = Image.open(img_bytes) + if image.mode == 'RGBA': + background = Image.new('RGB', image.size, (0, 0, 0)) + background.paste(image, mask=image.split()[3]) + image = background + elif image.mode != 'RGB': + image = image.convert('RGB') return image except Exception as e: print(f"Error loading image from bytes: {e}") @@ -167,12 +179,10 @@ class AIIADataset(torch.utils.data.Dataset): item = self.items[idx] if isinstance(item, tuple) and len(item) == 2: image, label = item - # Convert PIL image to tensor image = self.transform(image) return (image, label) elif isinstance(item, tuple) and len(item) == 3: image, task, label = item - # Convert PIL image to tensor first image = self.transform(image) if task == 'denoise': @@ -190,7 +200,6 @@ class AIIADataset(torch.utils.data.Dataset): raise ValueError(f"Unknown task: {task}") else: if isinstance(item, Image.Image): - # Convert single PIL image to tensor return self.transform(item) else: raise ValueError("Invalid item format.") From 7c4aef09789a14f42f42b616f0bebca2aaddba76 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 22:48:29 +0100 Subject: [PATCH 15/32] updated dataloader to work with tupels --- src/aiia/data/DataLoader.py | 44 ++++++++++------- src/pretrain.py | 94 ++++++++++++++----------------------- 2 files changed, 61 insertions(+), 77 deletions(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index 78f4eb5..567954f 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -106,10 +106,11 @@ class JPGImageLoader: print(f"Skipped {self.skipped_count} images due to errors.") class AIIADataLoader: - def __init__(self, dataset, batch_size=32, val_split=0.2, seed=42, column="file_path", label_column=None, **dataloader_kwargs): + def __init__(self, dataset, batch_size=32, val_split=0.2, seed=42, column="file_path", label_column=None, pretraining=False, **dataloader_kwargs): self.batch_size = batch_size self.val_split = val_split self.seed = seed + self.pretraining = pretraining random.seed(seed) sample_value = dataset[column].iloc[0] @@ -134,7 +135,12 @@ class AIIADataLoader: for idx in range(len(dataset)): item = self.loader.get_item(idx) if item is not None: - self.items.append(item) + if self.pretraining: + img = item[0] if isinstance(item, tuple) else item + self.items.append((img, 'denoise', img)) + self.items.append((img, 'rotate', 0)) + else: + self.items.append(item) if not self.items: raise ValueError("No valid items were loaded from the dataset") @@ -163,12 +169,14 @@ class AIIADataLoader: def _create_subset(self, indices): subset_items = [self.items[i] for i in indices] - return AIIADataset(subset_items) + return AIIADataset(subset_items, pretraining=self.pretraining) class AIIADataset(torch.utils.data.Dataset): - def __init__(self, items): + def __init__(self, items, pretraining=False): self.items = items + self.pretraining = pretraining self.transform = transforms.Compose([ + transforms.Resize((224, 224)), transforms.ToTensor() ]) @@ -177,29 +185,29 @@ class AIIADataset(torch.utils.data.Dataset): def __getitem__(self, idx): item = self.items[idx] - if isinstance(item, tuple) and len(item) == 2: - image, label = item - image = self.transform(image) - return (image, label) - elif isinstance(item, tuple) and len(item) == 3: + + if self.pretraining: image, task, label = item image = self.transform(image) if task == 'denoise': noise_std = 0.1 noisy_img = image + torch.randn_like(image) * noise_std - target = image - return (noisy_img, target, task) + target = image.clone() + return noisy_img, target, task elif task == 'rotate': angles = [0, 90, 180, 270] angle = random.choice(angles) rotated_img = transforms.functional.rotate(image, angle) - target = torch.tensor(angle).long() - return (rotated_img, target, task) - else: - raise ValueError(f"Unknown task: {task}") + target = torch.tensor(angle / 90).long() + return rotated_img, target, task else: - if isinstance(item, Image.Image): - return self.transform(item) + if isinstance(item, tuple) and len(item) == 2: + image, label = item + image = self.transform(image) + return image, label else: - raise ValueError("Invalid item format.") + if isinstance(item, Image.Image): + return self.transform(item) + else: + return self.transform(item[0]) diff --git a/src/pretrain.py b/src/pretrain.py index 78ce63a..8436d51 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -1,37 +1,26 @@ import torch from torch import nn -from torch.utils.data import Dataset, DataLoader -from torchvision import transforms -from PIL import Image -import os -import random import pandas as pd from aiia.model.config import AIIAConfig from aiia.model import AIIABase from aiia.data.DataLoader import AIIADataLoader def pretrain_model(data_path1, data_path2, num_epochs=3): - # Merge the two parquet files df1 = pd.read_parquet(data_path1).head(10000) df2 = pd.read_parquet(data_path2).head(10000) merged_df = pd.concat([df1, df2], ignore_index=True) - # Create a new AIIAConfig instance config = AIIAConfig( model_name="AIIA-Base-512x20k", ) - # Initialize the base model model = AIIABase(config) - # Create dataset loader with merged data - aiia_loader = AIIADataLoader(merged_df, column="image_bytes", batch_size=32) + aiia_loader = AIIADataLoader(merged_df, column="image_bytes", batch_size=32, pretraining=True) - # Access the train and validation loaders train_loader = aiia_loader.train_loader val_loader = aiia_loader.val_loader - # Initialize loss functions and optimizer criterion_denoise = nn.MSELoss() criterion_rotate = nn.CrossEntropyLoss() @@ -46,74 +35,61 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): print(f"\nEpoch {epoch+1}/{num_epochs}") print("-" * 20) - # Training phase model.train() total_train_loss = 0.0 + denoise_losses = [] + rotate_losses = [] for batch in train_loader: - images, targets, tasks = zip(*batch) - - if device == "cuda": - images = [img.cuda() for img in images] - targets = [t.cuda() for t in targets] + noisy_imgs, targets, tasks = batch + noisy_imgs = noisy_imgs.to(device) + targets = targets.to(device) optimizer.zero_grad() - # Process each sample individually since tasks can vary - outputs = [] - total_loss = 0.0 - for i, (image, target, task) in enumerate(zip(images, targets, tasks)): - output = model(image.unsqueeze(0)) - + outputs = model(noisy_imgs) + task_losses = [] + for i, task in enumerate(tasks): if task == 'denoise': - loss = criterion_denoise(output.squeeze(), target) - elif task == 'rotate': - loss = criterion_rotate(output.view(-1, len(set(outputs))), target) - - total_loss += loss - outputs.append(output) - - avg_loss = total_loss / len(images) - avg_loss.backward() + loss = criterion_denoise(outputs[i], targets[i]) + denoise_losses.append(loss.item()) + else: + loss = criterion_rotate(outputs[i].unsqueeze(0), targets[i].unsqueeze(0)) + rotate_losses.append(loss.item()) + task_losses.append(loss) + + batch_loss = sum(task_losses) / len(task_losses) + batch_loss.backward() optimizer.step() - total_train_loss += avg_loss.item() - # Separate losses for reporting (you'd need to track this based on tasks) - + total_train_loss += batch_loss.item() avg_total_train_loss = total_train_loss / len(train_loader) print(f"Training Loss: {avg_total_train_loss:.4f}") - # Validation phase model.eval() with torch.no_grad(): val_losses = [] for batch in val_loader: - images, targets, tasks = zip(*batch) + noisy_imgs, targets, tasks = batch - if device == "cuda": - images = [img.cuda() for img in images] - targets = [t.cuda() for t in targets] - - outputs = [] - total_loss = 0.0 - for i, (image, target, task) in enumerate(zip(images, targets, tasks)): - output = model(image.unsqueeze(0)) - + noisy_imgs = noisy_imgs.to(device) + targets = targets.to(device) + + outputs = model(noisy_imgs) + + task_losses = [] + for i, task in enumerate(tasks): if task == 'denoise': - loss = criterion_denoise(output.squeeze(), target) - elif task == 'rotate': - loss = criterion_rotate(output.view(-1, len(set(outputs))), target) - - total_loss += loss - outputs.append(output) - - avg_val_loss = total_loss / len(images) - val_losses.append(avg_val_loss.item()) - + loss = criterion_denoise(outputs[i], targets[i]) + else: + loss = criterion_rotate(outputs[i].unsqueeze(0), targets[i].unsqueeze(0)) + task_losses.append(loss) + + batch_loss = sum(task_losses) / len(task_losses) + val_losses.append(batch_loss.item()) avg_val_loss = sum(val_losses) / len(val_loader) print(f"Validation Loss: {avg_val_loss:.4f}") - # Save the best model if avg_val_loss < best_val_loss: best_val_loss = avg_val_loss model.save("BASEv0.1") @@ -122,4 +98,4 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): if __name__ == "__main__": data_path1 = "/root/training_data/vision-dataset/images_checkpoint.parquet" data_path2 = "/root/training_data/vision-dataset/vec_images_dataset.parquet" - pretrain_model(data_path1, data_path2, num_epochs=8) \ No newline at end of file + pretrain_model(data_path1, data_path2, num_epochs=3) \ No newline at end of file From 8a809269e5125d45a8daa93fd058a22f81890699 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 23:04:49 +0100 Subject: [PATCH 16/32] fix inline error --- src/aiia/data/DataLoader.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index 567954f..cbb22a6 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -134,17 +134,18 @@ class AIIADataLoader: self.items = [] for idx in range(len(dataset)): item = self.loader.get_item(idx) - if item is not None: + if item is not None: # Only add valid items if self.pretraining: img = item[0] if isinstance(item, tuple) else item self.items.append((img, 'denoise', img)) self.items.append((img, 'rotate', 0)) - else: + else: self.items.append(item) if not self.items: raise ValueError("No valid items were loaded from the dataset") + train_indices, val_indices = self._split_data() self.train_dataset = self._create_subset(train_indices) From 2b55f02b5054a29ff898a66d9e54fc2590040cc9 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 23:08:56 +0100 Subject: [PATCH 17/32] eorr handling because we have a tensor misshaping --- src/aiia/data/DataLoader.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/aiia/data/DataLoader.py b/src/aiia/data/DataLoader.py index cbb22a6..4ba5032 100644 --- a/src/aiia/data/DataLoader.py +++ b/src/aiia/data/DataLoader.py @@ -189,7 +189,12 @@ class AIIADataset(torch.utils.data.Dataset): if self.pretraining: image, task, label = item + if not isinstance(image, Image.Image): + raise ValueError(f"Invalid image at index {idx}") + image = self.transform(image) + if image.shape != (3, 224, 224): + raise ValueError(f"Invalid image shape at index {idx}: {image.shape}") if task == 'denoise': noise_std = 0.1 @@ -202,13 +207,22 @@ class AIIADataset(torch.utils.data.Dataset): rotated_img = transforms.functional.rotate(image, angle) target = torch.tensor(angle / 90).long() return rotated_img, target, task + else: + raise ValueError(f"Invalid task at index {idx}: {task}") else: if isinstance(item, tuple) and len(item) == 2: image, label = item + if not isinstance(image, Image.Image): + raise ValueError(f"Invalid image at index {idx}") image = self.transform(image) + if image.shape != (3, 224, 224): + raise ValueError(f"Invalid image shape at index {idx}: {image.shape}") return image, label else: if isinstance(item, Image.Image): - return self.transform(item) + image = self.transform(item) else: - return self.transform(item[0]) + image = self.transform(item[0]) + if image.shape != (3, 224, 224): + raise ValueError(f"Invalid image shape at index {idx}: {image.shape}") + return image From b501ae8317b9fbd18e48fb280eb1421cc0fbc5db Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 23:20:15 +0100 Subject: [PATCH 18/32] new pretraining script --- src/pretrain.py | 79 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 61 insertions(+), 18 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index 8436d51..4d98538 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -1,29 +1,65 @@ import torch -from torch import nn +from torch import nn, utils import pandas as pd from aiia.model.config import AIIAConfig from aiia.model import AIIABase from aiia.data.DataLoader import AIIADataLoader +import os +import copy def pretrain_model(data_path1, data_path2, num_epochs=3): + # Read and merge datasets df1 = pd.read_parquet(data_path1).head(10000) df2 = pd.read_parquet(data_path2).head(10000) merged_df = pd.concat([df1, df2], ignore_index=True) + # Model configuration config = AIIAConfig( model_name="AIIA-Base-512x20k", ) + # Initialize model and data loader model = AIIABase(config) + + # Define a custom collate function to handle preprocessing and skip bad samples + def safe_collate(batch): + processed_batch = [] + for sample in batch: + try: + # Process each sample here (e.g., decode image, preprocess, etc.) + # Replace with actual preprocessing steps + processed_sample = { + 'image': torch.randn(3, 224, 224), # Example tensor + 'target': torch.randint(0, 10, (1,)), # Example target + 'task': 'denoise' # Example task + } + processed_batch.append(processed_sample) + except Exception as e: + print(f"Skipping sample due to error: {e}") + if not processed_batch: + return None # Skip batch if all samples are invalid - aiia_loader = AIIADataLoader(merged_df, column="image_bytes", batch_size=32, pretraining=True) + # Stack tensors for the batch + images = torch.stack([x['image'] for x in processed_batch]) + targets = torch.stack([x['target'] for x in processed_batch]) + tasks = [x['task'] for x in processed_batch] + + return (images, targets, tasks) + + aiia_loader = AIIADataLoader( + merged_df, + column="image_bytes", + batch_size=32, + pretraining=True, + collate_fn=safe_collate + ) train_loader = aiia_loader.train_loader val_loader = aiia_loader.val_loader + # Define loss functions and optimizer criterion_denoise = nn.MSELoss() criterion_rotate = nn.CrossEntropyLoss() - optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate) device = "cuda" if torch.cuda.is_available() else "cpu" @@ -35,49 +71,55 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): print(f"\nEpoch {epoch+1}/{num_epochs}") print("-" * 20) + # Training phase model.train() total_train_loss = 0.0 - denoise_losses = [] - rotate_losses = [] - + for batch in train_loader: + if batch is None: + continue # Skip empty batches + noisy_imgs, targets, tasks = batch - noisy_imgs = noisy_imgs.to(device) targets = targets.to(device) + optimizer.zero_grad() - + outputs = model(noisy_imgs) task_losses = [] + for i, task in enumerate(tasks): if task == 'denoise': loss = criterion_denoise(outputs[i], targets[i]) - denoise_losses.append(loss.item()) else: loss = criterion_rotate(outputs[i].unsqueeze(0), targets[i].unsqueeze(0)) - rotate_losses.append(loss.item()) task_losses.append(loss) batch_loss = sum(task_losses) / len(task_losses) batch_loss.backward() optimizer.step() - + total_train_loss += batch_loss.item() + avg_total_train_loss = total_train_loss / len(train_loader) print(f"Training Loss: {avg_total_train_loss:.4f}") + # Validation phase model.eval() + val_loss = 0.0 + with torch.no_grad(): - val_losses = [] for batch in val_loader: + if batch is None: + continue + noisy_imgs, targets, tasks = batch - noisy_imgs = noisy_imgs.to(device) targets = targets.to(device) outputs = model(noisy_imgs) - task_losses = [] + for i, task in enumerate(tasks): if task == 'denoise': loss = criterion_denoise(outputs[i], targets[i]) @@ -86,10 +128,11 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): task_losses.append(loss) batch_loss = sum(task_losses) / len(task_losses) - val_losses.append(batch_loss.item()) - avg_val_loss = sum(val_losses) / len(val_loader) - print(f"Validation Loss: {avg_val_loss:.4f}") - + val_loss += batch_loss.item() + + avg_val_loss = val_loss / len(val_loader) + print(f"Validation Loss: {avg_val_loss:.4f}") + if avg_val_loss < best_val_loss: best_val_loss = avg_val_loss model.save("BASEv0.1") From 29f0d86ff7c9360437ae36500142115174e034af Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sun, 26 Jan 2025 23:26:42 +0100 Subject: [PATCH 19/32] kernel size as large as channel size --- src/aiia/model/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aiia/model/config.py b/src/aiia/model/config.py index e2ae83e..02bc709 100644 --- a/src/aiia/model/config.py +++ b/src/aiia/model/config.py @@ -8,7 +8,7 @@ class AIIAConfig: def __init__( self, model_name: str = "AIIA", - kernel_size: int = 5, + kernel_size: int = 3, activation_function: str = "GELU", hidden_size: int = 512, num_hidden_layers: int = 12, From 3749ba9c5fa74889044f78d6e7ded226c558a954 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 08:39:42 +0100 Subject: [PATCH 20/32] updated base models MaxPool2D --- src/aiia/model/Model.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/aiia/model/Model.py b/src/aiia/model/Model.py index a9584fa..771caf8 100644 --- a/src/aiia/model/Model.py +++ b/src/aiia/model/Model.py @@ -80,8 +80,9 @@ class AIIABaseShared(AIIA): # Initialize max pooling layer self.max_pool = nn.MaxPool2d( - kernel_size=self.config.kernel_size, - padding=1 # Using same padding as in Conv2d layers + kernel_size=1, + stride=1, + padding=1 ) def forward(self, x): @@ -117,7 +118,7 @@ class AIIABase(AIIA): nn.Conv2d(in_channels, self.config.hidden_size, kernel_size=self.config.kernel_size, padding=1), getattr(nn, self.config.activation_function)(), - nn.MaxPool2d(kernel_size=2) + nn.MaxPool2d(kernel_size=1, stride=1) ]) in_channels = self.config.hidden_size From d205346741a7d0a71000eee9fe4bc848dccf82cb Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 08:44:46 +0100 Subject: [PATCH 21/32] downsized trainingdata from 20k to 1k --- src/pretrain.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index 4d98538..505b032 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -9,13 +9,13 @@ import copy def pretrain_model(data_path1, data_path2, num_epochs=3): # Read and merge datasets - df1 = pd.read_parquet(data_path1).head(10000) - df2 = pd.read_parquet(data_path2).head(10000) + df1 = pd.read_parquet(data_path1).head(5000) + df2 = pd.read_parquet(data_path2).head(5000) merged_df = pd.concat([df1, df2], ignore_index=True) # Model configuration config = AIIAConfig( - model_name="AIIA-Base-512x20k", + model_name="AIIA-Base-512x10k", ) # Initialize model and data loader From 32526c3c30eb9a32532c35f8441763fa0caec1a0 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 08:53:01 +0100 Subject: [PATCH 22/32] 5k image --- src/pretrain.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index 505b032..c6e7705 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -9,13 +9,13 @@ import copy def pretrain_model(data_path1, data_path2, num_epochs=3): # Read and merge datasets - df1 = pd.read_parquet(data_path1).head(5000) - df2 = pd.read_parquet(data_path2).head(5000) + df1 = pd.read_parquet(data_path1).head(2500) + df2 = pd.read_parquet(data_path2).head(2500) merged_df = pd.concat([df1, df2], ignore_index=True) # Model configuration config = AIIAConfig( - model_name="AIIA-Base-512x10k", + model_name="AIIA-Base-512x5k", ) # Initialize model and data loader From 8dad1d7150f673af41bc510de97b27ef93ab9b85 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 09:02:22 +0100 Subject: [PATCH 23/32] downszied batchsize --- src/pretrain.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index c6e7705..38663e3 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -9,13 +9,13 @@ import copy def pretrain_model(data_path1, data_path2, num_epochs=3): # Read and merge datasets - df1 = pd.read_parquet(data_path1).head(2500) - df2 = pd.read_parquet(data_path2).head(2500) + df1 = pd.read_parquet(data_path1).head(10000) + df2 = pd.read_parquet(data_path2).head(10000) merged_df = pd.concat([df1, df2], ignore_index=True) # Model configuration config = AIIAConfig( - model_name="AIIA-Base-512x5k", + model_name="AIIA-Base-512x20k", ) # Initialize model and data loader @@ -49,7 +49,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): aiia_loader = AIIADataLoader( merged_df, column="image_bytes", - batch_size=32, + batch_size=8, pretraining=True, collate_fn=safe_collate ) From b546f4ee27db9c4bfc6e8cf58940b2f44839f0f2 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 09:08:39 +0100 Subject: [PATCH 24/32] furhter batchsizing downsizing --- src/pretrain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pretrain.py b/src/pretrain.py index 38663e3..31dce37 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -49,7 +49,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): aiia_loader = AIIADataLoader( merged_df, column="image_bytes", - batch_size=8, + batch_size=4, pretraining=True, collate_fn=safe_collate ) From 6c146f2767774ed0571475e1b47e6fdbf5ad81dc Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 09:13:22 +0100 Subject: [PATCH 25/32] added progressbar for batches --- src/pretrain.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index 31dce37..45aac3f 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -4,8 +4,8 @@ import pandas as pd from aiia.model.config import AIIAConfig from aiia.model import AIIABase from aiia.data.DataLoader import AIIADataLoader -import os -import copy +from tqdm import tqdm + def pretrain_model(data_path1, data_path2, num_epochs=3): # Read and merge datasets @@ -49,7 +49,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): aiia_loader = AIIADataLoader( merged_df, column="image_bytes", - batch_size=4, + batch_size=2, pretraining=True, collate_fn=safe_collate ) @@ -75,7 +75,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): model.train() total_train_loss = 0.0 - for batch in train_loader: + for batch in tqdm(train_loader): if batch is None: continue # Skip empty batches From 91a568731faa68bdcaa4eef474aaaac05ed51c68 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 09:26:08 +0100 Subject: [PATCH 26/32] removed placeholder collate function --- src/pretrain.py | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index 45aac3f..c4bc1a7 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -26,22 +26,22 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): processed_batch = [] for sample in batch: try: - # Process each sample here (e.g., decode image, preprocess, etc.) - # Replace with actual preprocessing steps - processed_sample = { - 'image': torch.randn(3, 224, 224), # Example tensor - 'target': torch.randint(0, 10, (1,)), # Example target - 'task': 'denoise' # Example task - } - processed_batch.append(processed_sample) + noisy_img, target, task = sample + processed_batch.append({ + 'image': noisy_img, + 'target': target, + 'task': task + }) except Exception as e: print(f"Skipping sample due to error: {e}") + continue + if not processed_batch: return None # Skip batch if all samples are invalid # Stack tensors for the batch images = torch.stack([x['image'] for x in processed_batch]) - targets = torch.stack([x['target'] for x in processed_batch]) + targets = [x['target'] for x in processed_batch] # Don't stack targets yet tasks = [x['task'] for x in processed_batch] return (images, targets, tasks) @@ -81,18 +81,21 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): noisy_imgs, targets, tasks = batch noisy_imgs = noisy_imgs.to(device) - targets = targets.to(device) optimizer.zero_grad() outputs = model(noisy_imgs) task_losses = [] - for i, task in enumerate(tasks): + for i, (output, target, task) in enumerate(zip(outputs, targets, tasks)): if task == 'denoise': - loss = criterion_denoise(outputs[i], targets[i]) - else: - loss = criterion_rotate(outputs[i].unsqueeze(0), targets[i].unsqueeze(0)) + target = target.to(device) # Move target to device + # Ensure output and target have same shape + loss = criterion_denoise(output, target) + else: # rotate task + target = target.to(device) # Move target to device + # For rotation task, output should be [batch_size, num_classes] + loss = criterion_rotate(output.view(1, -1), target.view(-1)) task_losses.append(loss) batch_loss = sum(task_losses) / len(task_losses) @@ -101,8 +104,8 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): total_train_loss += batch_loss.item() - avg_total_train_loss = total_train_loss / len(train_loader) - print(f"Training Loss: {avg_total_train_loss:.4f}") + avg_train_loss = total_train_loss / len(train_loader) + print(f"Training Loss: {avg_train_loss:.4f}") # Validation phase model.eval() @@ -115,16 +118,17 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): noisy_imgs, targets, tasks = batch noisy_imgs = noisy_imgs.to(device) - targets = targets.to(device) outputs = model(noisy_imgs) task_losses = [] - for i, task in enumerate(tasks): + for i, (output, target, task) in enumerate(zip(outputs, targets, tasks)): if task == 'denoise': - loss = criterion_denoise(outputs[i], targets[i]) + target = target.to(device) + loss = criterion_denoise(output, target) else: - loss = criterion_rotate(outputs[i].unsqueeze(0), targets[i].unsqueeze(0)) + target = target.to(device) + loss = criterion_rotate(output.view(1, -1), target.view(-1)) task_losses.append(loss) batch_loss = sum(task_losses) / len(task_losses) From 13cf1897aece547a101562db59d7c2e92d459594 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 09:32:20 +0100 Subject: [PATCH 27/32] handle both pretraining methods --- src/pretrain.py | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index c4bc1a7..690bd9e 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -6,7 +6,6 @@ from aiia.model import AIIABase from aiia.data.DataLoader import AIIADataLoader from tqdm import tqdm - def pretrain_model(data_path1, data_path2, num_epochs=3): # Read and merge datasets df1 = pd.read_parquet(data_path1).head(10000) @@ -21,7 +20,6 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): # Initialize model and data loader model = AIIABase(config) - # Define a custom collate function to handle preprocessing and skip bad samples def safe_collate(batch): processed_batch = [] for sample in batch: @@ -37,11 +35,11 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): continue if not processed_batch: - return None # Skip batch if all samples are invalid + return None # Stack tensors for the batch images = torch.stack([x['image'] for x in processed_batch]) - targets = [x['target'] for x in processed_batch] # Don't stack targets yet + targets = torch.stack([x['target'] for x in processed_batch]) # Stack targets tasks = [x['task'] for x in processed_batch] return (images, targets, tasks) @@ -57,7 +55,6 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): train_loader = aiia_loader.train_loader val_loader = aiia_loader.val_loader - # Define loss functions and optimizer criterion_denoise = nn.MSELoss() criterion_rotate = nn.CrossEntropyLoss() optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate) @@ -77,25 +74,29 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): for batch in tqdm(train_loader): if batch is None: - continue # Skip empty batches + continue noisy_imgs, targets, tasks = batch + batch_size = noisy_imgs.size(0) noisy_imgs = noisy_imgs.to(device) + targets = targets.to(device) optimizer.zero_grad() + # Get model outputs and reshape if necessary outputs = model(noisy_imgs) - task_losses = [] - for i, (output, target, task) in enumerate(zip(outputs, targets, tasks)): + task_losses = [] + for i, task in enumerate(tasks): if task == 'denoise': - target = target.to(device) # Move target to device - # Ensure output and target have same shape + # Ensure output matches target shape for denoising + output = outputs[i].view(3, 224, 224) # Reshape to match image dimensions + target = targets[i] loss = criterion_denoise(output, target) else: # rotate task - target = target.to(device) # Move target to device - # For rotation task, output should be [batch_size, num_classes] - loss = criterion_rotate(output.view(1, -1), target.view(-1)) + output = outputs[i].view(-1) # Flatten output for rotation prediction + target = targets[i].long() # Convert target to long for classification + loss = criterion_rotate(output.unsqueeze(0), target.unsqueeze(0)) task_losses.append(loss) batch_loss = sum(task_losses) / len(task_losses) @@ -118,17 +119,20 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): noisy_imgs, targets, tasks = batch noisy_imgs = noisy_imgs.to(device) + targets = targets.to(device) outputs = model(noisy_imgs) - task_losses = [] - for i, (output, target, task) in enumerate(zip(outputs, targets, tasks)): + task_losses = [] + for i, task in enumerate(tasks): if task == 'denoise': - target = target.to(device) + output = outputs[i].view(3, 224, 224) + target = targets[i] loss = criterion_denoise(output, target) else: - target = target.to(device) - loss = criterion_rotate(output.view(1, -1), target.view(-1)) + output = outputs[i].view(-1) + target = targets[i].long() + loss = criterion_rotate(output.unsqueeze(0), target.unsqueeze(0)) task_losses.append(loss) batch_loss = sum(task_losses) / len(task_losses) From b6b63851caa9ef582cbbeccb8c9a6b652353c8e0 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 10:15:00 +0100 Subject: [PATCH 28/32] addeed tasks for both denosing and rotation --- src/pretrain.py | 147 +++++++++++++++++++++++++++++------------------- 1 file changed, 88 insertions(+), 59 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index 690bd9e..201c03f 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -6,6 +6,7 @@ from aiia.model import AIIABase from aiia.data.DataLoader import AIIADataLoader from tqdm import tqdm + def pretrain_model(data_path1, data_path2, num_epochs=3): # Read and merge datasets df1 = pd.read_parquet(data_path1).head(10000) @@ -21,28 +22,49 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): model = AIIABase(config) def safe_collate(batch): - processed_batch = [] + denoise_batch = [] + rotate_batch = [] + for sample in batch: try: noisy_img, target, task = sample - processed_batch.append({ - 'image': noisy_img, - 'target': target, - 'task': task - }) + if task == 'denoise': + denoise_batch.append({ + 'image': noisy_img, + 'target': target, + 'task': task + }) + else: # rotate task + rotate_batch.append({ + 'image': noisy_img, + 'target': target, + 'task': task + }) except Exception as e: print(f"Skipping sample due to error: {e}") continue - if not processed_batch: + if not denoise_batch and not rotate_batch: return None - # Stack tensors for the batch - images = torch.stack([x['image'] for x in processed_batch]) - targets = torch.stack([x['target'] for x in processed_batch]) # Stack targets - tasks = [x['task'] for x in processed_batch] + batch_data = { + 'denoise': None, + 'rotate': None + } - return (images, targets, tasks) + # Process denoise batch + if denoise_batch: + images = torch.stack([x['image'] for x in denoise_batch]) + targets = torch.stack([x['target'] for x in denoise_batch]) + batch_data['denoise'] = (images, targets) + + # Process rotate batch + if rotate_batch: + images = torch.stack([x['image'] for x in rotate_batch]) + targets = torch.stack([x['target'] for x in rotate_batch]) + batch_data['rotate'] = (images, targets) + + return batch_data aiia_loader = AIIADataLoader( merged_df, @@ -71,74 +93,81 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): # Training phase model.train() total_train_loss = 0.0 + batch_count = 0 - for batch in tqdm(train_loader): - if batch is None: + for batch_data in tqdm(train_loader): + if batch_data is None: continue - noisy_imgs, targets, tasks = batch - batch_size = noisy_imgs.size(0) - noisy_imgs = noisy_imgs.to(device) - targets = targets.to(device) - optimizer.zero_grad() + batch_loss = 0 - # Get model outputs and reshape if necessary - outputs = model(noisy_imgs) + # Handle denoise task + if batch_data['denoise'] is not None: + noisy_imgs, targets = batch_data['denoise'] + noisy_imgs = noisy_imgs.to(device) + targets = targets.to(device) + + outputs = model(noisy_imgs) + loss = criterion_denoise(outputs, targets) + batch_loss += loss - task_losses = [] - for i, task in enumerate(tasks): - if task == 'denoise': - # Ensure output matches target shape for denoising - output = outputs[i].view(3, 224, 224) # Reshape to match image dimensions - target = targets[i] - loss = criterion_denoise(output, target) - else: # rotate task - output = outputs[i].view(-1) # Flatten output for rotation prediction - target = targets[i].long() # Convert target to long for classification - loss = criterion_rotate(output.unsqueeze(0), target.unsqueeze(0)) - task_losses.append(loss) + # Handle rotate task + if batch_data['rotate'] is not None: + imgs, targets = batch_data['rotate'] + imgs = imgs.to(device) + targets = targets.long().to(device) + + outputs = model(imgs) + loss = criterion_rotate(outputs, targets) + batch_loss += loss - batch_loss = sum(task_losses) / len(task_losses) - batch_loss.backward() - optimizer.step() - - total_train_loss += batch_loss.item() + if batch_loss > 0: + batch_loss.backward() + optimizer.step() + total_train_loss += batch_loss.item() + batch_count += 1 - avg_train_loss = total_train_loss / len(train_loader) + avg_train_loss = total_train_loss / max(batch_count, 1) print(f"Training Loss: {avg_train_loss:.4f}") # Validation phase model.eval() val_loss = 0.0 + val_batch_count = 0 with torch.no_grad(): - for batch in val_loader: - if batch is None: + for batch_data in val_loader: + if batch_data is None: continue - noisy_imgs, targets, tasks = batch - noisy_imgs = noisy_imgs.to(device) - targets = targets.to(device) + batch_loss = 0 - outputs = model(noisy_imgs) + # Handle denoise task + if batch_data['denoise'] is not None: + noisy_imgs, targets = batch_data['denoise'] + noisy_imgs = noisy_imgs.to(device) + targets = targets.to(device) + + outputs = model(noisy_imgs) + loss = criterion_denoise(outputs, targets) + batch_loss += loss - task_losses = [] - for i, task in enumerate(tasks): - if task == 'denoise': - output = outputs[i].view(3, 224, 224) - target = targets[i] - loss = criterion_denoise(output, target) - else: - output = outputs[i].view(-1) - target = targets[i].long() - loss = criterion_rotate(output.unsqueeze(0), target.unsqueeze(0)) - task_losses.append(loss) + # Handle rotate task + if batch_data['rotate'] is not None: + imgs, targets = batch_data['rotate'] + imgs = imgs.to(device) + targets = targets.long().to(device) + + outputs = model(imgs) + loss = criterion_rotate(outputs, targets) + batch_loss += loss - batch_loss = sum(task_losses) / len(task_losses) - val_loss += batch_loss.item() + if batch_loss > 0: + val_loss += batch_loss.item() + val_batch_count += 1 - avg_val_loss = val_loss / len(val_loader) + avg_val_loss = val_loss / max(val_batch_count, 1) print(f"Validation Loss: {avg_val_loss:.4f}") if avg_val_loss < best_val_loss: From fe4d6b5b22fed2c52cbaf3dcab7444300d81b6fc Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 10:26:28 +0100 Subject: [PATCH 29/32] corrected viewing and some prints --- src/pretrain.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/pretrain.py b/src/pretrain.py index 201c03f..0792f09 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -6,7 +6,6 @@ from aiia.model import AIIABase from aiia.data.DataLoader import AIIADataLoader from tqdm import tqdm - def pretrain_model(data_path1, data_path2, num_epochs=3): # Read and merge datasets df1 = pd.read_parquet(data_path1).head(10000) @@ -108,7 +107,19 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): noisy_imgs = noisy_imgs.to(device) targets = targets.to(device) + # Print shapes for debugging + print(f"\nDenoising task shapes:") + print(f"Input shape: {noisy_imgs.shape}") + print(f"Target shape: {targets.shape}") + outputs = model(noisy_imgs) + print(f"Raw output shape: {outputs.shape}") + + # Reshape output to match target dimensions + batch_size = targets.size(0) + outputs = outputs.view(batch_size, 3, 224, 224) + print(f"Reshaped output shape: {outputs.shape}") + loss = criterion_denoise(outputs, targets) batch_loss += loss @@ -118,7 +129,18 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): imgs = imgs.to(device) targets = targets.long().to(device) + # Print shapes for debugging + print(f"\nRotation task shapes:") + print(f"Input shape: {imgs.shape}") + print(f"Target shape: {targets.shape}") + outputs = model(imgs) + print(f"Raw output shape: {outputs.shape}") + + # Reshape output for rotation classification + outputs = outputs.view(targets.size(0), -1) # Flatten to [batch_size, features] + print(f"Reshaped output shape: {outputs.shape}") + loss = criterion_rotate(outputs, targets) batch_loss += loss @@ -150,6 +172,8 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): targets = targets.to(device) outputs = model(noisy_imgs) + batch_size = targets.size(0) + outputs = outputs.view(batch_size, 3, 224, 224) loss = criterion_denoise(outputs, targets) batch_loss += loss @@ -160,6 +184,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): targets = targets.long().to(device) outputs = model(imgs) + outputs = outputs.view(targets.size(0), -1) loss = criterion_rotate(outputs, targets) batch_loss += loss From 58baf0ad3c9b212000fbbb4372120161d9ac9950 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 10:43:59 +0100 Subject: [PATCH 30/32] overall improvement --- src/pretrain.py | 78 +++++++++++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 32 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index 0792f09..55b4650 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -6,6 +6,18 @@ from aiia.model import AIIABase from aiia.data.DataLoader import AIIADataLoader from tqdm import tqdm +class ProjectionHead(nn.Module): + def __init__(self): + super().__init__() + self.conv_denoise = nn.Conv2d(512, 3, kernel_size=1) + self.conv_rotate = nn.Conv2d(512, 4, kernel_size=1) # 4 classes for 0, 90, 180, 270 degrees + + def forward(self, x, task='denoise'): + if task == 'denoise': + return self.conv_denoise(x) + else: + return self.conv_rotate(x).mean(dim=(2, 3)) # Global average pooling for rotation task + def pretrain_model(data_path1, data_path2, num_epochs=3): # Read and merge datasets df1 = pd.read_parquet(data_path1).head(10000) @@ -17,9 +29,14 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): model_name="AIIA-Base-512x20k", ) - # Initialize model and data loader + # Initialize model and projection head model = AIIABase(config) + projection_head = ProjectionHead() + device = "cuda" if torch.cuda.is_available() else "cpu" + model.to(device) + projection_head.to(device) + def safe_collate(batch): denoise_batch = [] rotate_batch = [] @@ -51,13 +68,11 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): 'rotate': None } - # Process denoise batch if denoise_batch: images = torch.stack([x['image'] for x in denoise_batch]) targets = torch.stack([x['target'] for x in denoise_batch]) batch_data['denoise'] = (images, targets) - # Process rotate batch if rotate_batch: images = torch.stack([x['image'] for x in rotate_batch]) targets = torch.stack([x['target'] for x in rotate_batch]) @@ -78,10 +93,12 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): criterion_denoise = nn.MSELoss() criterion_rotate = nn.CrossEntropyLoss() - optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate) - - device = "cuda" if torch.cuda.is_available() else "cpu" - model.to(device) + + # Update optimizer to include projection head parameters + optimizer = torch.optim.AdamW( + list(model.parameters()) + list(projection_head.parameters()), + lr=config.learning_rate + ) best_val_loss = float('inf') @@ -91,6 +108,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): # Training phase model.train() + projection_head.train() total_train_loss = 0.0 batch_count = 0 @@ -107,18 +125,16 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): noisy_imgs = noisy_imgs.to(device) targets = targets.to(device) - # Print shapes for debugging + # Get features from base model + features = model(noisy_imgs) + # Project features back to image space + outputs = projection_head(features, task='denoise') + print(f"\nDenoising task shapes:") print(f"Input shape: {noisy_imgs.shape}") print(f"Target shape: {targets.shape}") - - outputs = model(noisy_imgs) - print(f"Raw output shape: {outputs.shape}") - - # Reshape output to match target dimensions - batch_size = targets.size(0) - outputs = outputs.view(batch_size, 3, 224, 224) - print(f"Reshaped output shape: {outputs.shape}") + print(f"Features shape: {features.shape}") + print(f"Output shape: {outputs.shape}") loss = criterion_denoise(outputs, targets) batch_loss += loss @@ -129,17 +145,16 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): imgs = imgs.to(device) targets = targets.long().to(device) - # Print shapes for debugging + # Get features from base model + features = model(imgs) + # Project features to rotation predictions + outputs = projection_head(features, task='rotate') + print(f"\nRotation task shapes:") print(f"Input shape: {imgs.shape}") print(f"Target shape: {targets.shape}") - - outputs = model(imgs) - print(f"Raw output shape: {outputs.shape}") - - # Reshape output for rotation classification - outputs = outputs.view(targets.size(0), -1) # Flatten to [batch_size, features] - print(f"Reshaped output shape: {outputs.shape}") + print(f"Features shape: {features.shape}") + print(f"Output shape: {outputs.shape}") loss = criterion_rotate(outputs, targets) batch_loss += loss @@ -155,6 +170,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): # Validation phase model.eval() + projection_head.eval() val_loss = 0.0 val_batch_count = 0 @@ -165,26 +181,23 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): batch_loss = 0 - # Handle denoise task if batch_data['denoise'] is not None: noisy_imgs, targets = batch_data['denoise'] noisy_imgs = noisy_imgs.to(device) targets = targets.to(device) - outputs = model(noisy_imgs) - batch_size = targets.size(0) - outputs = outputs.view(batch_size, 3, 224, 224) + features = model(noisy_imgs) + outputs = projection_head(features, task='denoise') loss = criterion_denoise(outputs, targets) batch_loss += loss - # Handle rotate task if batch_data['rotate'] is not None: imgs, targets = batch_data['rotate'] imgs = imgs.to(device) targets = targets.long().to(device) - outputs = model(imgs) - outputs = outputs.view(targets.size(0), -1) + features = model(imgs) + outputs = projection_head(features, task='rotate') loss = criterion_rotate(outputs, targets) batch_loss += loss @@ -197,7 +210,8 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): if avg_val_loss < best_val_loss: best_val_loss = avg_val_loss - model.save("BASEv0.1") + # Save both model and projection head + model.save("AIIA-base-512") print("Best model saved!") if __name__ == "__main__": From 8d08bfc14c4572020933ddf6cf710356c8eaf3db Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 10:56:16 +0100 Subject: [PATCH 31/32] removed rint statemetns and added csv saving --- src/pretrain.py | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index 55b4650..6e4c05c 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -1,5 +1,6 @@ import torch -from torch import nn, utils +from torch import nn +import csv import pandas as pd from aiia.model.config import AIIAConfig from aiia.model import AIIABase @@ -101,7 +102,8 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): ) best_val_loss = float('inf') - + train_losses = [] + val_losses = [] for epoch in range(num_epochs): print(f"\nEpoch {epoch+1}/{num_epochs}") print("-" * 20) @@ -128,14 +130,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): # Get features from base model features = model(noisy_imgs) # Project features back to image space - outputs = projection_head(features, task='denoise') - - print(f"\nDenoising task shapes:") - print(f"Input shape: {noisy_imgs.shape}") - print(f"Target shape: {targets.shape}") - print(f"Features shape: {features.shape}") - print(f"Output shape: {outputs.shape}") - + outputs = projection_head(features, task='denoise') loss = criterion_denoise(outputs, targets) batch_loss += loss @@ -150,12 +145,6 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): # Project features to rotation predictions outputs = projection_head(features, task='rotate') - print(f"\nRotation task shapes:") - print(f"Input shape: {imgs.shape}") - print(f"Target shape: {targets.shape}") - print(f"Features shape: {features.shape}") - print(f"Output shape: {outputs.shape}") - loss = criterion_rotate(outputs, targets) batch_loss += loss @@ -166,6 +155,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): batch_count += 1 avg_train_loss = total_train_loss / max(batch_count, 1) + train_losses.append(avg_train_loss) print(f"Training Loss: {avg_train_loss:.4f}") # Validation phase @@ -206,6 +196,7 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): val_batch_count += 1 avg_val_loss = val_loss / max(val_batch_count, 1) + val_losses.append(avg_val_loss) print(f"Validation Loss: {avg_val_loss:.4f}") if avg_val_loss < best_val_loss: @@ -214,6 +205,21 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): model.save("AIIA-base-512") print("Best model saved!") + # Prepare the data to be written to the CSV file + data = list(zip(range(1, len(train_losses) + 1), train_losses, val_losses)) + + # Specify the CSV file name + csv_file = 'losses.csv' + + # Write the data to the CSV file + with open(csv_file, mode='w', newline='') as file: + writer = csv.writer(file) + # Write the header + writer.writerow(['Epoch', 'Train Loss', 'Validation Loss']) + # Write the data + writer.writerows(data) + print(f"Data has been written to {csv_file}") + if __name__ == "__main__": data_path1 = "/root/training_data/vision-dataset/images_checkpoint.parquet" data_path2 = "/root/training_data/vision-dataset/vec_images_dataset.parquet" From 102d9520437ff031bc994548ac9eefbe4c79b71e Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 11:06:42 +0100 Subject: [PATCH 32/32] increased epochs --- src/pretrain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pretrain.py b/src/pretrain.py index 6e4c05c..02e4e7f 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -223,4 +223,4 @@ def pretrain_model(data_path1, data_path2, num_epochs=3): if __name__ == "__main__": data_path1 = "/root/training_data/vision-dataset/images_checkpoint.parquet" data_path2 = "/root/training_data/vision-dataset/vec_images_dataset.parquet" - pretrain_model(data_path1, data_path2, num_epochs=3) \ No newline at end of file + pretrain_model(data_path1, data_path2, num_epochs=10) \ No newline at end of file