From b1c486afeee8181d4cadfd9cf6789e8f5ace1e68 Mon Sep 17 00:00:00 2001
From: Falko Habel <falko.habel@gmx.de>
Date: Mon, 24 Feb 2025 13:41:11 +0100
Subject: [PATCH 1/2] added fp16 and bf16 support when loading model

---
 pyproject.toml          | 10 ++++-
 setup.py                |  2 +-
 src/aiia/model/Model.py | 94 +++++++++++++++++++++++++++--------------
 3 files changed, 72 insertions(+), 34 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index a8bdbe9..8587060 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,4 +5,12 @@ build-backend = "setuptools.build_meta"
 [tool.black]
 line-length = 88
 target-version = ['py37']
-include = '\.pyi?$'
\ No newline at end of file
+include = '\.pyi?$'
+
+[project]
+name = "AIIA"
+version = "0.1.1"  # Replace with your desired version number
+description = "AIIA Deep Learning Model"
+authors = [
+    { name="Falko Habel", email="falko.habel@gmx.de" }
+]
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 0eb6be6..ee79501 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
 
 setup(
     name="aiia",
-    version="0.1.0",
+    version="0.1.1",
     packages=find_packages(where="src"),
     package_dir={"": "src"},
     install_requires=[
diff --git a/src/aiia/model/Model.py b/src/aiia/model/Model.py
index c188464..2180545 100644
--- a/src/aiia/model/Model.py
+++ b/src/aiia/model/Model.py
@@ -3,6 +3,7 @@ from torch import nn
 import torch
 import os
 import copy
+import warnings
 
 
 class AIIA(nn.Module):
@@ -22,15 +23,66 @@ class AIIA(nn.Module):
         self.config.save(path)
 
     @classmethod
-    def load(cls, path):
+    def load(cls, path, precision: str = None):
+        """
+        Load the model from the given path.
+        
+        Parameters:
+        - path (str): The directory containing the saved model.
+        - precision (str, optional): The desired precision for model weights.
+          Options are:
+              'fp16'   -> load weights with torch.float16,
+              'bf16'   -> load weights with torch.bfloat16.
+          If precision is None, default torch.float32 is used.
+        """
         config = AIIAConfig.load(path)
         model = cls(config)
-        # Check if CUDA is available and set the device accordingly
         device = 'cuda' if torch.cuda.is_available() else 'cpu'
-        # Load the state dictionary with the correct device mapping
-        model.load_state_dict(torch.load(f"{path}/model.pth", map_location=device))
+        dtype = None
+
+        if precision is not None:
+            if precision.lower() == 'fp16':
+                dtype = torch.float16
+            elif precision.lower() == 'bf16':
+                # For CUDA devices, check whether BF16 is supported. If not, fallback to FP16.
+                if device == 'cuda' and not torch.cuda.is_bf16_supported():
+                    warnings.warn("BF16 is not supported on this GPU. Falling back to FP16.")
+                    dtype = torch.float16
+                else:
+                    dtype = torch.bfloat16
+            else:
+                raise ValueError("Unsupported precision. Use 'fp16', 'bf16', or leave as None.")
+
+        if dtype is not None:
+            model_dict = torch.load(f"{path}/model.pth", map_location=device, dtype=dtype)
+        else:
+            model_dict = torch.load(f"{path}/model.pth", map_location=device)
+        
+        model.load_state_dict(model_dict)
         return model
-    
+
+class AIIABase(AIIA):
+    def __init__(self, config: AIIAConfig, **kwargs):
+        super().__init__(config=config, **kwargs)  
+        self.config = self.config
+        
+        # Initialize layers based on configuration
+        layers = []
+        in_channels = self.config.num_channels
+        
+        for _ in range(self.config.num_hidden_layers):
+            layers.extend([
+                nn.Conv2d(in_channels, self.config.hidden_size,
+                          kernel_size=self.config.kernel_size, padding=1),
+                getattr(nn, self.config.activation_function)(),
+                nn.MaxPool2d(kernel_size=1, stride=1)
+             ])
+            in_channels = self.config.hidden_size
+        
+        self.cnn = nn.Sequential(*layers)
+
+    def forward(self, x):
+        return self.cnn(x)
     
 class AIIABaseShared(AIIA):
     def __init__(self, config: AIIAConfig, **kwargs):
@@ -107,29 +159,6 @@ class AIIABaseShared(AIIA):
             
         return out
 
-class AIIABase(AIIA):
-    def __init__(self, config: AIIAConfig, **kwargs):
-        super().__init__(config=config, **kwargs)  
-        self.config = self.config
-        
-        # Initialize layers based on configuration
-        layers = []
-        in_channels = self.config.num_channels
-        
-        for _ in range(self.config.num_hidden_layers):
-            layers.extend([
-                nn.Conv2d(in_channels, self.config.hidden_size,
-                          kernel_size=self.config.kernel_size, padding=1),
-                getattr(nn, self.config.activation_function)(),
-                nn.MaxPool2d(kernel_size=1, stride=1)
-             ])
-            in_channels = self.config.hidden_size
-        
-        self.cnn = nn.Sequential(*layers)
-
-    def forward(self, x):
-        return self.cnn(x)
-
 class AIIAExpert(AIIA):
     def __init__(self, config: AIIAConfig, base_class=AIIABase, **kwargs):
         super().__init__(config=config, **kwargs)
@@ -227,7 +256,8 @@ class AIIArecursive(AIIA):
                 
             combined_output = torch.mean(torch.stack(processed_patches, dim=0), dim=0)
             return combined_output
-        
-config = AIIAConfig()
-model = AIIAmoe(config, num_experts=5)
-model.save("test")
\ No newline at end of file
+
+if __name__ =="__main__":            
+    config = AIIAConfig()
+    model = AIIAmoe(config, num_experts=5)
+    model.save("test")
\ No newline at end of file

From 50e91b10e8e066d6488bf15c2f53f446e0b766b7 Mon Sep 17 00:00:00 2001
From: Falko Habel <falko.habel@gmx.de>
Date: Mon, 24 Feb 2025 14:13:10 +0100
Subject: [PATCH 2/2] fixed model loading due to a bug

---
 pyproject.toml          | 20 +++++++++++++++++---
 run.py                  | 25 ++-----------------------
 setup.py                | 21 +--------------------
 src/aiia/model/Model.py | 25 +++++++++----------------
 4 files changed, 29 insertions(+), 62 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8587060..1a40317 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,9 +8,23 @@ target-version = ['py37']
 include = '\.pyi?$'
 
 [project]
-name = "AIIA"
-version = "0.1.1"  # Replace with your desired version number
-description = "AIIA Deep Learning Model"
+name = "aiia"
+version = "0.1.1"
+description = "AIIA Deep Learning Model Implementation"
+readme = "README.md"
 authors = [
     { name="Falko Habel", email="falko.habel@gmx.de" }
+]
+dependencies = [
+    "torch>=2.5.0",
+    "numpy",
+    "tqdm",
+    "pytest",
+    "pillow"
+]
+requires-python = ">=3.7"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent"
 ]
\ No newline at end of file
diff --git a/run.py b/run.py
index fb20e63..f9fa8a9 100644
--- a/run.py
+++ b/run.py
@@ -1,27 +1,6 @@
-data_path1 = "/root/training_data/vision-dataset/images_pretrain.parquet"
-data_path2 = "/root/training_data/vision-dataset/vector_img_pretrain.parquet"
-    
-from aiia.model import AIIABase
-from aiia.model.config import AIIAConfig
-from aiia.pretrain import Pretrainer
 
-# Create your model
-config = AIIAConfig(model_name="AIIA-Base-512x20k")
-model = AIIABase(config)
 
-# Initialize pretrainer with the model
-pretrainer = Pretrainer(model, learning_rate=config.learning_rate, config=config)
 
-# List of dataset paths
-dataset_paths = [
-    data_path1,
-    data_path2
-]
+from aiia import AIIABase
 
-# Start training with multiple datasets
-pretrainer.train(
-    dataset_paths=dataset_paths,
-    num_epochs=10,
-    batch_size=2,
-    sample_size=10000
-)
\ No newline at end of file
+model = AIIABase.load(path="AIIA-base-512", precision="bf16")
\ No newline at end of file
diff --git a/setup.py b/setup.py
index ee79501..19c7ee8 100644
--- a/setup.py
+++ b/setup.py
@@ -1,25 +1,6 @@
 from setuptools import setup, find_packages
 
 setup(
-    name="aiia",
-    version="0.1.1",
     packages=find_packages(where="src"),
     package_dir={"": "src"},
-    install_requires=[
-        "torch>=1.8.0",
-        "numpy>=1.19.0",
-        "tqdm>=4.62.0",
-    ],
-    author="Falko Habel",
-    author_email="falko.habel@gmx.de",
-    description="AIIA deep learning model implementation",
-    long_description=open("README.md").read(),
-    long_description_content_type="text/markdown",
-    url="https://gitea.fabelous.app/Maschine-Learning/AIIA.git",
-    classifiers=[
-        "Programming Language :: Python :: 3",
-        "License :: OSI Approved :: Creative Commons Attribution-NonCommercial 4.0 International",
-        "Operating System :: OS Independent",
-    ],
-    python_requires=">=3.10",
-)
+)
\ No newline at end of file
diff --git a/src/aiia/model/Model.py b/src/aiia/model/Model.py
index 2180545..c067a67 100644
--- a/src/aiia/model/Model.py
+++ b/src/aiia/model/Model.py
@@ -24,17 +24,6 @@ class AIIA(nn.Module):
 
     @classmethod
     def load(cls, path, precision: str = None):
-        """
-        Load the model from the given path.
-        
-        Parameters:
-        - path (str): The directory containing the saved model.
-        - precision (str, optional): The desired precision for model weights.
-          Options are:
-              'fp16'   -> load weights with torch.float16,
-              'bf16'   -> load weights with torch.bfloat16.
-          If precision is None, default torch.float32 is used.
-        """
         config = AIIAConfig.load(path)
         model = cls(config)
         device = 'cuda' if torch.cuda.is_available() else 'cpu'
@@ -44,7 +33,6 @@ class AIIA(nn.Module):
             if precision.lower() == 'fp16':
                 dtype = torch.float16
             elif precision.lower() == 'bf16':
-                # For CUDA devices, check whether BF16 is supported. If not, fallback to FP16.
                 if device == 'cuda' and not torch.cuda.is_bf16_supported():
                     warnings.warn("BF16 is not supported on this GPU. Falling back to FP16.")
                     dtype = torch.float16
@@ -53,14 +41,19 @@ class AIIA(nn.Module):
             else:
                 raise ValueError("Unsupported precision. Use 'fp16', 'bf16', or leave as None.")
 
+        # Load the state dictionary normally (without dtype argument)
+        model_dict = torch.load(f"{path}/model.pth", map_location=device)
+
+        # If a precision conversion is requested, cast each tensor in the state dict to the target dtype.
         if dtype is not None:
-            model_dict = torch.load(f"{path}/model.pth", map_location=device, dtype=dtype)
-        else:
-            model_dict = torch.load(f"{path}/model.pth", map_location=device)
-        
+            for key, param in model_dict.items():
+                if torch.is_tensor(param):
+                    model_dict[key] = param.to(dtype)
+
         model.load_state_dict(model_dict)
         return model
 
+
 class AIIABase(AIIA):
     def __init__(self, config: AIIAConfig, **kwargs):
         super().__init__(config=config, **kwargs)