Merge pull request 'half_precision' (#7) from half_precision into develop

Reviewed-on: #7
2025-02-24 13:14:00 +00:00 · 2025-02-24 13:14:00 +00:00 · 91f352340d
parent 027676d95e 50e91b10e8
commit 91f352340d
4 changed files with 81 additions and 76 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -5,4 +5,26 @@ build-backend = "setuptools.build_meta"
 [tool.black]
 line-length = 88
 target-version = ['py37']
-include = '\.pyi?$'
+include = '\.pyi?$'
 [project]
 name = "aiia"
 version = "0.1.1"
 description = "AIIA Deep Learning Model Implementation"
 readme = "README.md"
 authors = [
    { name="Falko Habel", email="falko.habel@gmx.de" }
 ]
 dependencies = [
    "torch>=2.5.0",
    "numpy",
    "tqdm",
    "pytest",
    "pillow"
 ]
 requires-python = ">=3.7"
 classifiers = [
    "Programming Language :: Python :: 3",
    "License :: OSI Approved :: MIT License",
    "Operating System :: OS Independent"
 ]
--- a/run.py
+++ b/run.py
@ -1,27 +1,6 @@
 data_path1 = "/root/training_data/vision-dataset/images_pretrain.parquet"
 data_path2 = "/root/training_data/vision-dataset/vector_img_pretrain.parquet"
 from aiia.model import AIIABase
 from aiia.model.config import AIIAConfig
 from aiia.pretrain import Pretrainer
 # Create your model
 config = AIIAConfig(model_name="AIIA-Base-512x20k")
 model = AIIABase(config)
 # Initialize pretrainer with the model
 pretrainer = Pretrainer(model, learning_rate=config.learning_rate, config=config)
-# List of dataset paths
+from aiia import AIIABase
 dataset_paths = [
    data_path1,
    data_path2
 ]
-# Start training with multiple datasets
+model = AIIABase.load(path="AIIA-base-512", precision="bf16")
 pretrainer.train(
    dataset_paths=dataset_paths,
    num_epochs=10,
    batch_size=2,
    sample_size=10000
 )
--- a/setup.py
+++ b/setup.py
@ -1,25 +1,6 @@
 from setuptools import setup, find_packages
 setup(
    name="aiia",
    version="0.1.0",
    packages=find_packages(where="src"),
    package_dir={"": "src"},
-    install_requires=[
+)
        "torch>=1.8.0",
        "numpy>=1.19.0",
        "tqdm>=4.62.0",
    ],
    author="Falko Habel",
    author_email="falko.habel@gmx.de",
    description="AIIA deep learning model implementation",
    long_description=open("README.md").read(),
    long_description_content_type="text/markdown",
    url="https://gitea.fabelous.app/Maschine-Learning/AIIA.git",
    classifiers=[
        "Programming Language :: Python :: 3",
        "License :: OSI Approved :: Creative Commons Attribution-NonCommercial 4.0 International",
        "Operating System :: OS Independent",
    ],
    python_requires=">=3.10",
 )
--- a/src/aiia/model/Model.py
+++ b/src/aiia/model/Model.py
@ -3,6 +3,7 @@ from torch import nn
 import torch
 import os
 import copy
 import warnings
 class AIIA(nn.Module):
@ -22,15 +23,59 @@ class AIIA(nn.Module):
        self.config.save(path)
    @classmethod
-    def load(cls, path):
+    def load(cls, path, precision: str = None):
        config = AIIAConfig.load(path)
        model = cls(config)
        # Check if CUDA is available and set the device accordingly
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
-        # Load the state dictionary with the correct device mapping
+        dtype = None
-        model.load_state_dict(torch.load(f"{path}/model.pth", map_location=device))
+
        if precision is not None:
            if precision.lower() == 'fp16':
                dtype = torch.float16
            elif precision.lower() == 'bf16':
                if device == 'cuda' and not torch.cuda.is_bf16_supported():
                    warnings.warn("BF16 is not supported on this GPU. Falling back to FP16.")
                    dtype = torch.float16
                else:
                    dtype = torch.bfloat16
            else:
                raise ValueError("Unsupported precision. Use 'fp16', 'bf16', or leave as None.")
        # Load the state dictionary normally (without dtype argument)
        model_dict = torch.load(f"{path}/model.pth", map_location=device)
        # If a precision conversion is requested, cast each tensor in the state dict to the target dtype.
        if dtype is not None:
            for key, param in model_dict.items():
                if torch.is_tensor(param):
                    model_dict[key] = param.to(dtype)
        model.load_state_dict(model_dict)
        return model
-    
+
 class AIIABase(AIIA):
    def __init__(self, config: AIIAConfig, **kwargs):
        super().__init__(config=config, **kwargs)  
        self.config = self.config
        # Initialize layers based on configuration
        layers = []
        in_channels = self.config.num_channels
        for _ in range(self.config.num_hidden_layers):
            layers.extend([
                nn.Conv2d(in_channels, self.config.hidden_size,
                          kernel_size=self.config.kernel_size, padding=1),
                getattr(nn, self.config.activation_function)(),
                nn.MaxPool2d(kernel_size=1, stride=1)
             ])
            in_channels = self.config.hidden_size
        self.cnn = nn.Sequential(*layers)
    def forward(self, x):
        return self.cnn(x)
 class AIIABaseShared(AIIA):
    def __init__(self, config: AIIAConfig, **kwargs):
@ -107,29 +152,6 @@ class AIIABaseShared(AIIA):
        return out
 class AIIABase(AIIA):
    def __init__(self, config: AIIAConfig, **kwargs):
        super().__init__(config=config, **kwargs)  
        self.config = self.config
        # Initialize layers based on configuration
        layers = []
        in_channels = self.config.num_channels
        for _ in range(self.config.num_hidden_layers):
            layers.extend([
                nn.Conv2d(in_channels, self.config.hidden_size,
                          kernel_size=self.config.kernel_size, padding=1),
                getattr(nn, self.config.activation_function)(),
                nn.MaxPool2d(kernel_size=1, stride=1)
             ])
            in_channels = self.config.hidden_size
        self.cnn = nn.Sequential(*layers)
    def forward(self, x):
        return self.cnn(x)
 class AIIAExpert(AIIA):
    def __init__(self, config: AIIAConfig, base_class=AIIABase, **kwargs):
        super().__init__(config=config, **kwargs)
@ -227,7 +249,8 @@ class AIIArecursive(AIIA):
            combined_output = torch.mean(torch.stack(processed_patches, dim=0), dim=0)
            return combined_output
-        
+
-config = AIIAConfig()
+if __name__ =="__main__":            
-model = AIIAmoe(config, num_experts=5)
+    config = AIIAConfig()
-model.save("test")
+    model = AIIAmoe(config, num_experts=5)
    model.save("test")