improved shared model to have ~10% of params

2025-01-24 18:23:54 +01:00 · 2025-01-24 18:23:54 +01:00 · 8ac31c5bf1
parent 599b8c4835
commit 8ac31c5bf1
1 changed files with 56 additions and 27 deletions
--- a/src/aiia/model/Model.py
+++ b/src/aiia/model/Model.py
@ -28,41 +28,65 @@ class AIIA(nn.Module):
        return model

 class AIIABaseShared(AIIA):
-    """
-    Base class with parameter sharing.
-    All hidden layers share the same weights
-    """
-    def __init__(self, config: AIIAConfig, **kwargs):
+    def __init__(self, config: AIIAConfig, num_shared_layers=1, **kwargs):
        super().__init__(config=config, **kwargs)
        self.config = copy.deepcopy(config)
-        
+        self.config.num_shared_layers = num_shared_layers
        # Update config with new parameters if provided
        for key, value in kwargs.items():
            setattr(self.config, key, value)

-        # Initialize shared layers
-        self.conv_layer = nn.Conv2d(
-            self.config.num_channels,
-            self.config.hidden_size,
-            kernel_size=self.config.kernel_size,
-            padding=1
-        )
+        # Shared layers (early stages) use the same kernel
+        self.shared_layers = nn.ModuleList()
+        for _ in range(self.config.num_shared_layers):
+            layer = nn.Conv2d(
+                self.config.num_channels,
+                self.config.hidden_size,
+                kernel_size=self.config.kernel_size,
+                padding=1
+            )
+            # Initialize with shared weights if it's the first layer
+            if len(self.shared_layers) == 0:
+                self.shared_weights = layer.weight
+                self.shared_biases = nn.ParameterList([
+                    nn.Parameter(torch.zeros(self.config.hidden_size))
+                    for _ in range(self.config.num_shared_layers)
+                ])
+            else:
+                layer.weight = self.shared_weights
+                # Assign separate biases
+                layer.bias = self.shared_biases[len(self.shared_layers)]
+            self.shared_layers.append(layer)
+
+        # Unique layers (later stages) have their own weights and biases
+        self.unique_layers = nn.ModuleList()
+        in_channels = self.config.hidden_size
+        for _ in range(self.config.num_shared_layers):
+            self.unique_layers.append(
+                nn.Conv2d(
+                    in_channels,
+                    self.config.hidden_size,
+                    kernel_size=self.config.kernel_size,
+                    padding=1
+                )
+            )
+
+        # Activation and pooling layers
        self.activation_function = getattr(nn, self.config.activation_function)()
-        self.max_pool = nn.MaxPool2d(kernel_size=2)
-        
-        # Create a Sequential container with shared layers repeated
-        layers = []
-        for _ in range(self.config.num_hidden_layers):
-            layers.extend([
-                self.conv_layer,
-                self.activation_function,
-                self.max_pool
-            ])
-            
-        self.cnn = nn.Sequential(*layers)
+        self.max_pool = nn.MaxPool2d(self.config.kernel_size)

    def forward(self, x):
-        return self.cnn(x)
+        for layer in self.shared_layers:
+            x = layer(x)
+            x = self.activation_function(x)
+            x = self.max_pool(x)
+        
+        for layer in self.unique_layers:
+            x = layer(x)
+            x = self.activation_function(x)
+            x = self.max_pool(x)
+
+        return x
    

 class AIIABase(AIIA):
@ -184,4 +208,9 @@ class AIIArecursive(AIIA):
                processed_patches.append(pp)
                
            combined_output = torch.mean(torch.stack(processed_patches, dim=0), dim=0)
-            return combined_output
+            return combined_output
+        
+config = AIIAConfig()
+model2 = AIIABaseShared(config)
+
+model2.save("shared")