improved shared model to have ~10% of params

This commit is contained in:
Falko Victor Habel 2025-01-24 18:23:54 +01:00
parent 599b8c4835
commit 8ac31c5bf1
1 changed files with 56 additions and 27 deletions

View File

@ -28,41 +28,65 @@ class AIIA(nn.Module):
return model
class AIIABaseShared(AIIA):
"""
Base class with parameter sharing.
All hidden layers share the same weights
"""
def __init__(self, config: AIIAConfig, **kwargs):
def __init__(self, config: AIIAConfig, num_shared_layers=1, **kwargs):
super().__init__(config=config, **kwargs)
self.config = copy.deepcopy(config)
self.config.num_shared_layers = num_shared_layers
# Update config with new parameters if provided
for key, value in kwargs.items():
setattr(self.config, key, value)
# Initialize shared layers
self.conv_layer = nn.Conv2d(
self.config.num_channels,
self.config.hidden_size,
kernel_size=self.config.kernel_size,
padding=1
)
# Shared layers (early stages) use the same kernel
self.shared_layers = nn.ModuleList()
for _ in range(self.config.num_shared_layers):
layer = nn.Conv2d(
self.config.num_channels,
self.config.hidden_size,
kernel_size=self.config.kernel_size,
padding=1
)
# Initialize with shared weights if it's the first layer
if len(self.shared_layers) == 0:
self.shared_weights = layer.weight
self.shared_biases = nn.ParameterList([
nn.Parameter(torch.zeros(self.config.hidden_size))
for _ in range(self.config.num_shared_layers)
])
else:
layer.weight = self.shared_weights
# Assign separate biases
layer.bias = self.shared_biases[len(self.shared_layers)]
self.shared_layers.append(layer)
# Unique layers (later stages) have their own weights and biases
self.unique_layers = nn.ModuleList()
in_channels = self.config.hidden_size
for _ in range(self.config.num_shared_layers):
self.unique_layers.append(
nn.Conv2d(
in_channels,
self.config.hidden_size,
kernel_size=self.config.kernel_size,
padding=1
)
)
# Activation and pooling layers
self.activation_function = getattr(nn, self.config.activation_function)()
self.max_pool = nn.MaxPool2d(kernel_size=2)
# Create a Sequential container with shared layers repeated
layers = []
for _ in range(self.config.num_hidden_layers):
layers.extend([
self.conv_layer,
self.activation_function,
self.max_pool
])
self.cnn = nn.Sequential(*layers)
self.max_pool = nn.MaxPool2d(self.config.kernel_size)
def forward(self, x):
return self.cnn(x)
for layer in self.shared_layers:
x = layer(x)
x = self.activation_function(x)
x = self.max_pool(x)
for layer in self.unique_layers:
x = layer(x)
x = self.activation_function(x)
x = self.max_pool(x)
return x
class AIIABase(AIIA):
@ -184,4 +208,9 @@ class AIIArecursive(AIIA):
processed_patches.append(pp)
combined_output = torch.mean(torch.stack(processed_patches, dim=0), dim=0)
return combined_output
return combined_output
config = AIIAConfig()
model2 = AIIABaseShared(config)
model2.save("shared")