From 1e665c4604b8a1f11e152b286e03c39b29cc76cf Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Tue, 28 Jan 2025 10:58:33 +0100 Subject: [PATCH 1/5] added first pip install version 0.1 --- MANIFEST.in | 4 ++++ pyproject.toml | 8 ++++++++ requirements.txt | 5 +++++ setup.cfg | 26 ++++++++++++++++++++++++++ setup.py | 25 +++++++++++++++++++++++++ src/aiia/__init__.py | 8 +++++--- src/aiia/data/__init__.py | 4 +++- src/aiia/model/Model.py | 6 +++++- src/aiia/model/__init__.py | 21 ++++++++++++++++++++- 9 files changed, 101 insertions(+), 6 deletions(-) create mode 100644 MANIFEST.in create mode 100644 pyproject.toml create mode 100644 requirements.txt create mode 100644 setup.cfg create mode 100644 setup.py diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..6925c8f --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,4 @@ +include LICENSE +include README.md +include requirements.txt +recursive-include src/aiia * \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a8bdbe9 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,8 @@ +[build-system] +requires = ["setuptools>=42", "wheel"] +build-backend = "setuptools.build_meta" + +[tool.black] +line-length = 88 +target-version = ['py37'] +include = '\.pyi?$' \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..06e8438 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +torch>=4.5.0 +numpy +tqdm +pytest +pillow \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..fb45363 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,26 @@ +[metadata] +name = aiia +version = 0.1.0 +author = Your Name +author_email = falko.habel@gmx.de +description = AIIA deep learning model implementation +long_description = file: README.md +long_description_content_type = text/markdown +url = https://gitea.fabelous.app/Maschine-Learning/AIIA.git +classifiers = + Programming Language :: Python :: 3 + License :: OSI Approved :: MIT License + Operating System :: OS Independent + +[options] +package_dir = + = src +packages = find: +python_requires = >=3.7 +install_requires = + torch>=1.8.0 + numpy>=1.19.0 + tqdm>=4.62.0 + +[options.packages.find] +where = src \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..0eb6be6 --- /dev/null +++ b/setup.py @@ -0,0 +1,25 @@ +from setuptools import setup, find_packages + +setup( + name="aiia", + version="0.1.0", + packages=find_packages(where="src"), + package_dir={"": "src"}, + install_requires=[ + "torch>=1.8.0", + "numpy>=1.19.0", + "tqdm>=4.62.0", + ], + author="Falko Habel", + author_email="falko.habel@gmx.de", + description="AIIA deep learning model implementation", + long_description=open("README.md").read(), + long_description_content_type="text/markdown", + url="https://gitea.fabelous.app/Maschine-Learning/AIIA.git", + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: Creative Commons Attribution-NonCommercial 4.0 International", + "Operating System :: OS Independent", + ], + python_requires=">=3.10", +) diff --git a/src/aiia/__init__.py b/src/aiia/__init__.py index 71acf48..6dbc27a 100644 --- a/src/aiia/__init__.py +++ b/src/aiia/__init__.py @@ -1,3 +1,5 @@ -from .model import AIIA, AIIABase, AIIAchunked, AIIAExpert, AIIAmoe, AIIArecursive, AIIABaseShared -from .data import AIIADataLoader -from .model.config import AIIAConfig \ No newline at end of file +from .model.Model import AIIABase, AIIABaseShared, AIIAchunked, AIIAExpert, AIIAmoe, AIIA, AIIArecursive +from .model.config import AIIAConfig +from .data.DataLoader import DataLoader + +__version__ = "0.1.0" diff --git a/src/aiia/data/__init__.py b/src/aiia/data/__init__.py index d1ae9b0..5e8a93c 100644 --- a/src/aiia/data/__init__.py +++ b/src/aiia/data/__init__.py @@ -1 +1,3 @@ -from .DataLoader import AIIADataLoader \ No newline at end of file +from .DataLoader import AIIADataLoader + +__all__ = ["AIIADataLoader"] diff --git a/src/aiia/model/Model.py b/src/aiia/model/Model.py index 771caf8..f0e65ff 100644 --- a/src/aiia/model/Model.py +++ b/src/aiia/model/Model.py @@ -223,4 +223,8 @@ class AIIArecursive(AIIA): processed_patches.append(pp) combined_output = torch.mean(torch.stack(processed_patches, dim=0), dim=0) - return combined_output \ No newline at end of file + return combined_output + +config = AIIAConfig() +model = AIIAmoe(config, num_experts=5) +model.save("test") \ No newline at end of file diff --git a/src/aiia/model/__init__.py b/src/aiia/model/__init__.py index 0e6a459..f68a42a 100644 --- a/src/aiia/model/__init__.py +++ b/src/aiia/model/__init__.py @@ -1,2 +1,21 @@ +from .Model import ( + AIIA, + AIIABase, + AIIABaseShared, + AIIAchunked, + AIIAExpert, + AIIAmoe, + AIIArecursive +) from .config import AIIAConfig -from .Model import AIIA, AIIABase, AIIAchunked, AIIAExpert, AIIAmoe, AIIArecursive, AIIABaseShared \ No newline at end of file + +__all__ = [ + "AIIA", + "AIIABase", + "AIIABaseShared", + "AIIAchunked", + "AIIAExpert", + "AIIAmoe", + "AIIArecursive", + "AIIAConfig" +] \ No newline at end of file From 7de7eef0810bd134819335d3938da04fcbcba53d Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Tue, 28 Jan 2025 11:16:09 +0100 Subject: [PATCH 2/5] updated pretraing to create a extra class for Pretraining --- README.md | 17 +++ src/aiia/__init__.py | 2 + src/aiia/pretrain/__init__.py | 3 + src/aiia/pretrain/pretrainer.py | 219 +++++++++++++++++++++++++++++++ src/pretrain.py | 226 -------------------------------- 5 files changed, 241 insertions(+), 226 deletions(-) create mode 100644 src/aiia/pretrain/__init__.py create mode 100644 src/aiia/pretrain/pretrainer.py delete mode 100644 src/pretrain.py diff --git a/README.md b/README.md index 0d888a0..830f111 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,19 @@ # AIIA + +## Example Usage: +```Python +if __name__ == "__main__": + data_path1 = "/root/training_data/vision-dataset/images_checkpoint.parquet" + data_path2 = "/root/training_data/vision-dataset/vec_images_dataset.parquet" + + from aiia.model import AIIABase + from aiia.model.config import AIIAConfig + from aiia.pretrain import Pretrainer + + config = AIIAConfig(model_name="AIIA-Base-512x20k") + model = AIIABase(config) + + pretrainer = Pretrainer(model, learning_rate=1e-4) + pretrainer.train(data_path1, data_path2, num_epochs=10) +``` \ No newline at end of file diff --git a/src/aiia/__init__.py b/src/aiia/__init__.py index 6dbc27a..6a27146 100644 --- a/src/aiia/__init__.py +++ b/src/aiia/__init__.py @@ -1,5 +1,7 @@ from .model.Model import AIIABase, AIIABaseShared, AIIAchunked, AIIAExpert, AIIAmoe, AIIA, AIIArecursive from .model.config import AIIAConfig from .data.DataLoader import DataLoader +from .pretrain.pretrainer import Pretrainer, ProjectionHead + __version__ = "0.1.0" diff --git a/src/aiia/pretrain/__init__.py b/src/aiia/pretrain/__init__.py new file mode 100644 index 0000000..c45cbc4 --- /dev/null +++ b/src/aiia/pretrain/__init__.py @@ -0,0 +1,3 @@ +from .pretrainer import Pretrainer, ProjectionHead + +__all__ = ["Pretrainer", "ProjectionHead"] \ No newline at end of file diff --git a/src/aiia/pretrain/pretrainer.py b/src/aiia/pretrain/pretrainer.py new file mode 100644 index 0000000..b540db0 --- /dev/null +++ b/src/aiia/pretrain/pretrainer.py @@ -0,0 +1,219 @@ +import torch +from torch import nn +import csv +import pandas as pd +from tqdm import tqdm +from ..model.Model import AIIA +from ..data.DataLoader import AIIADataLoader + +class ProjectionHead(nn.Module): + def __init__(self): + super().__init__() + self.conv_denoise = nn.Conv2d(512, 3, kernel_size=1) + self.conv_rotate = nn.Conv2d(512, 4, kernel_size=1) # 4 classes for 0, 90, 180, 270 degrees + + def forward(self, x, task='denoise'): + if task == 'denoise': + return self.conv_denoise(x) + else: + return self.conv_rotate(x).mean(dim=(2, 3)) # Global average pooling for rotation task + +class Pretrainer: + def __init__(self, model: AIIA, learning_rate=1e-4): + """ + Initialize the pretrainer with a model. + + Args: + model (AIIA): The model instance to pretrain + learning_rate (float): Learning rate for optimization + """ + self.device = "cuda" if torch.cuda.is_available() else "cpu" + self.model = model.to(self.device) + self.projection_head = ProjectionHead().to(self.device) + self.optimizer = torch.optim.AdamW( + list(self.model.parameters()) + list(self.projection_head.parameters()), + lr=learning_rate + ) + self.train_losses = [] + self.val_losses = [] + + @staticmethod + def safe_collate(batch): + """Safely collate batch data handling both denoise and rotate tasks.""" + denoise_batch = [] + rotate_batch = [] + + for sample in batch: + try: + noisy_img, target, task = sample + if task == 'denoise': + denoise_batch.append({ + 'image': noisy_img, + 'target': target, + 'task': task + }) + else: # rotate task + rotate_batch.append({ + 'image': noisy_img, + 'target': target, + 'task': task + }) + except Exception as e: + print(f"Skipping sample due to error: {e}") + continue + + if not denoise_batch and not rotate_batch: + return None + + batch_data = { + 'denoise': None, + 'rotate': None + } + + if denoise_batch: + images = torch.stack([x['image'] for x in denoise_batch]) + targets = torch.stack([x['target'] for x in denoise_batch]) + batch_data['denoise'] = (images, targets) + + if rotate_batch: + images = torch.stack([x['image'] for x in rotate_batch]) + targets = torch.stack([x['target'] for x in rotate_batch]) + batch_data['rotate'] = (images, targets) + + return batch_data + + def _process_batch(self, batch_data, criterion_denoise, criterion_rotate, training=True): + """Process a single batch of data.""" + batch_loss = 0 + + if batch_data['denoise'] is not None: + noisy_imgs, targets = batch_data['denoise'] + noisy_imgs = noisy_imgs.to(self.device) + targets = targets.to(self.device) + + features = self.model(noisy_imgs) + outputs = self.projection_head(features, task='denoise') + loss = criterion_denoise(outputs, targets) + batch_loss += loss + + if batch_data['rotate'] is not None: + imgs, targets = batch_data['rotate'] + imgs = imgs.to(self.device) + targets = targets.long().to(self.device) + + features = self.model(imgs) + outputs = self.projection_head(features, task='rotate') + loss = criterion_rotate(outputs, targets) + batch_loss += loss + + return batch_loss + + def train(self, data_path1, data_path2, num_epochs=3, batch_size=2, sample_size=10000): + """ + Train the model using the specified datasets. + + Args: + data_path1 (str): Path to first dataset + data_path2 (str): Path to second dataset + num_epochs (int): Number of training epochs + batch_size (int): Batch size for training + sample_size (int): Number of samples to use from each dataset + """ + # Read and merge datasets + df1 = pd.read_parquet(data_path1).head(sample_size) + df2 = pd.read_parquet(data_path2).head(sample_size) + merged_df = pd.concat([df1, df2], ignore_index=True) + + # Initialize data loader + aiia_loader = AIIADataLoader( + merged_df, + column="image_bytes", + batch_size=batch_size, + pretraining=True, + collate_fn=self.safe_collate + ) + + criterion_denoise = nn.MSELoss() + criterion_rotate = nn.CrossEntropyLoss() + best_val_loss = float('inf') + + for epoch in range(num_epochs): + print(f"\nEpoch {epoch+1}/{num_epochs}") + print("-" * 20) + + # Training phase + self.model.train() + self.projection_head.train() + total_train_loss = 0.0 + batch_count = 0 + + for batch_data in tqdm(aiia_loader.train_loader): + if batch_data is None: + continue + + self.optimizer.zero_grad() + batch_loss = self._process_batch(batch_data, criterion_denoise, criterion_rotate) + + if batch_loss > 0: + batch_loss.backward() + self.optimizer.step() + total_train_loss += batch_loss.item() + batch_count += 1 + + avg_train_loss = total_train_loss / max(batch_count, 1) + self.train_losses.append(avg_train_loss) + print(f"Training Loss: {avg_train_loss:.4f}") + + # Validation phase + self.model.eval() + self.projection_head.eval() + val_loss = self._validate(aiia_loader.val_loader, criterion_denoise, criterion_rotate) + + if val_loss < best_val_loss: + best_val_loss = val_loss + self.save_model("AIIA-base-512") + print("Best model saved!") + + self.save_losses('losses.csv') + + def _validate(self, val_loader, criterion_denoise, criterion_rotate): + """Perform validation and return average validation loss.""" + val_loss = 0.0 + val_batch_count = 0 + + with torch.no_grad(): + for batch_data in val_loader: + if batch_data is None: + continue + + batch_loss = self._process_batch( + batch_data, criterion_denoise, criterion_rotate, training=False + ) + + if batch_loss > 0: + val_loss += batch_loss.item() + val_batch_count += 1 + + avg_val_loss = val_loss / max(val_batch_count, 1) + self.val_losses.append(avg_val_loss) + print(f"Validation Loss: {avg_val_loss:.4f}") + return avg_val_loss + + def save_model(self, path): + """Save the model and projection head.""" + self.model.save(path) + torch.save(self.projection_head.state_dict(), f"{path}_projection_head.pth") + + def save_losses(self, csv_file): + """Save training and validation losses to a CSV file.""" + data = list(zip( + range(1, len(self.train_losses) + 1), + self.train_losses, + self.val_losses + )) + + with open(csv_file, mode='w', newline='') as file: + writer = csv.writer(file) + writer.writerow(['Epoch', 'Train Loss', 'Validation Loss']) + writer.writerows(data) + print(f"Loss data has been written to {csv_file}") \ No newline at end of file diff --git a/src/pretrain.py b/src/pretrain.py deleted file mode 100644 index 02e4e7f..0000000 --- a/src/pretrain.py +++ /dev/null @@ -1,226 +0,0 @@ -import torch -from torch import nn -import csv -import pandas as pd -from aiia.model.config import AIIAConfig -from aiia.model import AIIABase -from aiia.data.DataLoader import AIIADataLoader -from tqdm import tqdm - -class ProjectionHead(nn.Module): - def __init__(self): - super().__init__() - self.conv_denoise = nn.Conv2d(512, 3, kernel_size=1) - self.conv_rotate = nn.Conv2d(512, 4, kernel_size=1) # 4 classes for 0, 90, 180, 270 degrees - - def forward(self, x, task='denoise'): - if task == 'denoise': - return self.conv_denoise(x) - else: - return self.conv_rotate(x).mean(dim=(2, 3)) # Global average pooling for rotation task - -def pretrain_model(data_path1, data_path2, num_epochs=3): - # Read and merge datasets - df1 = pd.read_parquet(data_path1).head(10000) - df2 = pd.read_parquet(data_path2).head(10000) - merged_df = pd.concat([df1, df2], ignore_index=True) - - # Model configuration - config = AIIAConfig( - model_name="AIIA-Base-512x20k", - ) - - # Initialize model and projection head - model = AIIABase(config) - projection_head = ProjectionHead() - - device = "cuda" if torch.cuda.is_available() else "cpu" - model.to(device) - projection_head.to(device) - - def safe_collate(batch): - denoise_batch = [] - rotate_batch = [] - - for sample in batch: - try: - noisy_img, target, task = sample - if task == 'denoise': - denoise_batch.append({ - 'image': noisy_img, - 'target': target, - 'task': task - }) - else: # rotate task - rotate_batch.append({ - 'image': noisy_img, - 'target': target, - 'task': task - }) - except Exception as e: - print(f"Skipping sample due to error: {e}") - continue - - if not denoise_batch and not rotate_batch: - return None - - batch_data = { - 'denoise': None, - 'rotate': None - } - - if denoise_batch: - images = torch.stack([x['image'] for x in denoise_batch]) - targets = torch.stack([x['target'] for x in denoise_batch]) - batch_data['denoise'] = (images, targets) - - if rotate_batch: - images = torch.stack([x['image'] for x in rotate_batch]) - targets = torch.stack([x['target'] for x in rotate_batch]) - batch_data['rotate'] = (images, targets) - - return batch_data - - aiia_loader = AIIADataLoader( - merged_df, - column="image_bytes", - batch_size=2, - pretraining=True, - collate_fn=safe_collate - ) - - train_loader = aiia_loader.train_loader - val_loader = aiia_loader.val_loader - - criterion_denoise = nn.MSELoss() - criterion_rotate = nn.CrossEntropyLoss() - - # Update optimizer to include projection head parameters - optimizer = torch.optim.AdamW( - list(model.parameters()) + list(projection_head.parameters()), - lr=config.learning_rate - ) - - best_val_loss = float('inf') - train_losses = [] - val_losses = [] - for epoch in range(num_epochs): - print(f"\nEpoch {epoch+1}/{num_epochs}") - print("-" * 20) - - # Training phase - model.train() - projection_head.train() - total_train_loss = 0.0 - batch_count = 0 - - for batch_data in tqdm(train_loader): - if batch_data is None: - continue - - optimizer.zero_grad() - batch_loss = 0 - - # Handle denoise task - if batch_data['denoise'] is not None: - noisy_imgs, targets = batch_data['denoise'] - noisy_imgs = noisy_imgs.to(device) - targets = targets.to(device) - - # Get features from base model - features = model(noisy_imgs) - # Project features back to image space - outputs = projection_head(features, task='denoise') - loss = criterion_denoise(outputs, targets) - batch_loss += loss - - # Handle rotate task - if batch_data['rotate'] is not None: - imgs, targets = batch_data['rotate'] - imgs = imgs.to(device) - targets = targets.long().to(device) - - # Get features from base model - features = model(imgs) - # Project features to rotation predictions - outputs = projection_head(features, task='rotate') - - loss = criterion_rotate(outputs, targets) - batch_loss += loss - - if batch_loss > 0: - batch_loss.backward() - optimizer.step() - total_train_loss += batch_loss.item() - batch_count += 1 - - avg_train_loss = total_train_loss / max(batch_count, 1) - train_losses.append(avg_train_loss) - print(f"Training Loss: {avg_train_loss:.4f}") - - # Validation phase - model.eval() - projection_head.eval() - val_loss = 0.0 - val_batch_count = 0 - - with torch.no_grad(): - for batch_data in val_loader: - if batch_data is None: - continue - - batch_loss = 0 - - if batch_data['denoise'] is not None: - noisy_imgs, targets = batch_data['denoise'] - noisy_imgs = noisy_imgs.to(device) - targets = targets.to(device) - - features = model(noisy_imgs) - outputs = projection_head(features, task='denoise') - loss = criterion_denoise(outputs, targets) - batch_loss += loss - - if batch_data['rotate'] is not None: - imgs, targets = batch_data['rotate'] - imgs = imgs.to(device) - targets = targets.long().to(device) - - features = model(imgs) - outputs = projection_head(features, task='rotate') - loss = criterion_rotate(outputs, targets) - batch_loss += loss - - if batch_loss > 0: - val_loss += batch_loss.item() - val_batch_count += 1 - - avg_val_loss = val_loss / max(val_batch_count, 1) - val_losses.append(avg_val_loss) - print(f"Validation Loss: {avg_val_loss:.4f}") - - if avg_val_loss < best_val_loss: - best_val_loss = avg_val_loss - # Save both model and projection head - model.save("AIIA-base-512") - print("Best model saved!") - - # Prepare the data to be written to the CSV file - data = list(zip(range(1, len(train_losses) + 1), train_losses, val_losses)) - - # Specify the CSV file name - csv_file = 'losses.csv' - - # Write the data to the CSV file - with open(csv_file, mode='w', newline='') as file: - writer = csv.writer(file) - # Write the header - writer.writerow(['Epoch', 'Train Loss', 'Validation Loss']) - # Write the data - writer.writerows(data) - print(f"Data has been written to {csv_file}") - -if __name__ == "__main__": - data_path1 = "/root/training_data/vision-dataset/images_checkpoint.parquet" - data_path2 = "/root/training_data/vision-dataset/vec_images_dataset.parquet" - pretrain_model(data_path1, data_path2, num_epochs=10) \ No newline at end of file From a369c49f15eb1213cdf2dd952d12144e0fc62d82 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Tue, 28 Jan 2025 11:27:42 +0100 Subject: [PATCH 3/5] improved pretraining --- README.md | 37 +++++++++++++++++++++------------ example.py | 27 ++++++++++++++++++++++++ src/aiia/pretrain/pretrainer.py | 29 ++++++++++++++++++-------- 3 files changed, 71 insertions(+), 22 deletions(-) create mode 100644 example.py diff --git a/README.md b/README.md index 830f111..6f149b1 100644 --- a/README.md +++ b/README.md @@ -3,17 +3,28 @@ ## Example Usage: ```Python -if __name__ == "__main__": - data_path1 = "/root/training_data/vision-dataset/images_checkpoint.parquet" - data_path2 = "/root/training_data/vision-dataset/vec_images_dataset.parquet" - - from aiia.model import AIIABase - from aiia.model.config import AIIAConfig - from aiia.pretrain import Pretrainer - - config = AIIAConfig(model_name="AIIA-Base-512x20k") - model = AIIABase(config) - - pretrainer = Pretrainer(model, learning_rate=1e-4) - pretrainer.train(data_path1, data_path2, num_epochs=10) +from aiia.model import AIIABase +from aiia.model.config import AIIAConfig +from aiia.pretrain import Pretrainer + +# Create your model +config = AIIAConfig(model_name="AIIA-Base-512x20k") +model = AIIABase(config) + +# Initialize pretrainer with the model +pretrainer = Pretrainer(model, learning_rate=1e-4) + +# List of dataset paths +dataset_paths = [ + "/path/to/dataset1.parquet", + "/path/to/dataset2.parquet" +] + +# Start training with multiple datasets +pretrainer.train( + dataset_paths=dataset_paths, + num_epochs=10, + batch_size=2, + sample_size=10000 +) ``` \ No newline at end of file diff --git a/example.py b/example.py new file mode 100644 index 0000000..6e1620b --- /dev/null +++ b/example.py @@ -0,0 +1,27 @@ +data_path1 = "/root/training_data/vision-dataset/images_checkpoint.parquet" +data_path2 = "/root/training_data/vision-dataset/vec_images_dataset.parquet" + +from aiia.model import AIIABase +from aiia.model.config import AIIAConfig +from aiia.pretrain import Pretrainer + +# Create your model +config = AIIAConfig(model_name="AIIA-Base-512x10k-small", num_hidden_layers=6, hidden_size=256) +model = AIIABase(config) + +# Initialize pretrainer with the model +pretrainer = Pretrainer(model, learning_rate=config.learning_rate) + +# List of dataset paths +dataset_paths = [ + data_path1, + data_path2 +] + +# Start training with multiple datasets +pretrainer.train( + dataset_paths=dataset_paths, + num_epochs=10, + batch_size=2, + sample_size=10000 +) \ No newline at end of file diff --git a/src/aiia/pretrain/pretrainer.py b/src/aiia/pretrain/pretrainer.py index b540db0..913b77a 100644 --- a/src/aiia/pretrain/pretrainer.py +++ b/src/aiia/pretrain/pretrainer.py @@ -108,26 +108,37 @@ class Pretrainer: return batch_loss - def train(self, data_path1, data_path2, num_epochs=3, batch_size=2, sample_size=10000): + def train(self, dataset_paths, column="image_bytes", num_epochs=3, batch_size=2, sample_size=10000): """ - Train the model using the specified datasets. + Train the model using multiple specified datasets. Args: - data_path1 (str): Path to first dataset - data_path2 (str): Path to second dataset + dataset_paths (list): List of paths to parquet datasets num_epochs (int): Number of training epochs batch_size (int): Batch size for training sample_size (int): Number of samples to use from each dataset """ - # Read and merge datasets - df1 = pd.read_parquet(data_path1).head(sample_size) - df2 = pd.read_parquet(data_path2).head(sample_size) - merged_df = pd.concat([df1, df2], ignore_index=True) + if not dataset_paths: + raise ValueError("No dataset paths provided") + + # Read and merge all datasets + dataframes = [] + for path in dataset_paths: + try: + df = pd.read_parquet(path).head(sample_size) + dataframes.append(df) + except Exception as e: + print(f"Error loading dataset {path}: {e}") + + if not dataframes: + raise ValueError("No valid datasets could be loaded") + + merged_df = pd.concat(dataframes, ignore_index=True) # Initialize data loader aiia_loader = AIIADataLoader( merged_df, - column="image_bytes", + column=column, batch_size=batch_size, pretraining=True, collate_fn=self.safe_collate From 3631df7f0af72be95a35cb4b21f54279e0846cf8 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Tue, 28 Jan 2025 11:42:03 +0100 Subject: [PATCH 4/5] updated pretrainer to handle multiple classes and configs. --- example.py | 2 +- src/aiia/pretrain/pretrainer.py | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/example.py b/example.py index 6e1620b..6d605ca 100644 --- a/example.py +++ b/example.py @@ -10,7 +10,7 @@ config = AIIAConfig(model_name="AIIA-Base-512x10k-small", num_hidden_layers=6, h model = AIIABase(config) # Initialize pretrainer with the model -pretrainer = Pretrainer(model, learning_rate=config.learning_rate) +pretrainer = Pretrainer(model, learning_rate=config.learning_rate, config=config) # List of dataset paths dataset_paths = [ diff --git a/src/aiia/pretrain/pretrainer.py b/src/aiia/pretrain/pretrainer.py index 913b77a..fa84fcb 100644 --- a/src/aiia/pretrain/pretrainer.py +++ b/src/aiia/pretrain/pretrainer.py @@ -4,13 +4,15 @@ import csv import pandas as pd from tqdm import tqdm from ..model.Model import AIIA +from ..model.config import AIIAConfig from ..data.DataLoader import AIIADataLoader + class ProjectionHead(nn.Module): - def __init__(self): + def __init__(self, hidden_size): super().__init__() - self.conv_denoise = nn.Conv2d(512, 3, kernel_size=1) - self.conv_rotate = nn.Conv2d(512, 4, kernel_size=1) # 4 classes for 0, 90, 180, 270 degrees + self.conv_denoise = nn.Conv2d(hidden_size, 3, kernel_size=1) + self.conv_rotate = nn.Conv2d(hidden_size, 4, kernel_size=1) # 4 classes for 0, 90, 180, 270 degrees def forward(self, x, task='denoise'): if task == 'denoise': @@ -19,17 +21,19 @@ class ProjectionHead(nn.Module): return self.conv_rotate(x).mean(dim=(2, 3)) # Global average pooling for rotation task class Pretrainer: - def __init__(self, model: AIIA, learning_rate=1e-4): + def __init__(self, model: AIIA, learning_rate=1e-4, config: AIIAConfig=None): """ Initialize the pretrainer with a model. Args: model (AIIA): The model instance to pretrain learning_rate (float): Learning rate for optimization + config (dict): Model configuration containing hidden_size """ self.device = "cuda" if torch.cuda.is_available() else "cpu" self.model = model.to(self.device) - self.projection_head = ProjectionHead().to(self.device) + hidden_size = config.hidden_size + self.projection_head = ProjectionHead(hidden_size).to(self.device) self.optimizer = torch.optim.AdamW( list(self.model.parameters()) + list(self.projection_head.parameters()), lr=learning_rate From 3c0e9e8ac1be6e88551ecc1304acfb1c75d4d311 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Tue, 28 Jan 2025 17:18:21 +0100 Subject: [PATCH 5/5] fixed savings --- run.py | 27 +++++++++++++++++++++++++++ src/aiia/pretrain/pretrainer.py | 6 +----- 2 files changed, 28 insertions(+), 5 deletions(-) create mode 100644 run.py diff --git a/run.py b/run.py new file mode 100644 index 0000000..fb20e63 --- /dev/null +++ b/run.py @@ -0,0 +1,27 @@ +data_path1 = "/root/training_data/vision-dataset/images_pretrain.parquet" +data_path2 = "/root/training_data/vision-dataset/vector_img_pretrain.parquet" + +from aiia.model import AIIABase +from aiia.model.config import AIIAConfig +from aiia.pretrain import Pretrainer + +# Create your model +config = AIIAConfig(model_name="AIIA-Base-512x20k") +model = AIIABase(config) + +# Initialize pretrainer with the model +pretrainer = Pretrainer(model, learning_rate=config.learning_rate, config=config) + +# List of dataset paths +dataset_paths = [ + data_path1, + data_path2 +] + +# Start training with multiple datasets +pretrainer.train( + dataset_paths=dataset_paths, + num_epochs=10, + batch_size=2, + sample_size=10000 +) \ No newline at end of file diff --git a/src/aiia/pretrain/pretrainer.py b/src/aiia/pretrain/pretrainer.py index fa84fcb..30ebc92 100644 --- a/src/aiia/pretrain/pretrainer.py +++ b/src/aiia/pretrain/pretrainer.py @@ -186,7 +186,7 @@ class Pretrainer: if val_loss < best_val_loss: best_val_loss = val_loss - self.save_model("AIIA-base-512") + self.model.save("AIIA-base-512") print("Best model saved!") self.save_losses('losses.csv') @@ -214,10 +214,6 @@ class Pretrainer: print(f"Validation Loss: {avg_val_loss:.4f}") return avg_val_loss - def save_model(self, path): - """Save the model and projection head.""" - self.model.save(path) - torch.save(self.projection_head.state_dict(), f"{path}_projection_head.pth") def save_losses(self, csv_file): """Save training and validation losses to a CSV file."""