improved shared model to have ~10% of params
This commit is contained in:
parent
599b8c4835
commit
8ac31c5bf1
|
@ -28,41 +28,65 @@ class AIIA(nn.Module):
|
|||
return model
|
||||
|
||||
class AIIABaseShared(AIIA):
|
||||
"""
|
||||
Base class with parameter sharing.
|
||||
All hidden layers share the same weights
|
||||
"""
|
||||
def __init__(self, config: AIIAConfig, **kwargs):
|
||||
def __init__(self, config: AIIAConfig, num_shared_layers=1, **kwargs):
|
||||
super().__init__(config=config, **kwargs)
|
||||
self.config = copy.deepcopy(config)
|
||||
|
||||
self.config.num_shared_layers = num_shared_layers
|
||||
# Update config with new parameters if provided
|
||||
for key, value in kwargs.items():
|
||||
setattr(self.config, key, value)
|
||||
|
||||
# Initialize shared layers
|
||||
self.conv_layer = nn.Conv2d(
|
||||
self.config.num_channels,
|
||||
self.config.hidden_size,
|
||||
kernel_size=self.config.kernel_size,
|
||||
padding=1
|
||||
)
|
||||
# Shared layers (early stages) use the same kernel
|
||||
self.shared_layers = nn.ModuleList()
|
||||
for _ in range(self.config.num_shared_layers):
|
||||
layer = nn.Conv2d(
|
||||
self.config.num_channels,
|
||||
self.config.hidden_size,
|
||||
kernel_size=self.config.kernel_size,
|
||||
padding=1
|
||||
)
|
||||
# Initialize with shared weights if it's the first layer
|
||||
if len(self.shared_layers) == 0:
|
||||
self.shared_weights = layer.weight
|
||||
self.shared_biases = nn.ParameterList([
|
||||
nn.Parameter(torch.zeros(self.config.hidden_size))
|
||||
for _ in range(self.config.num_shared_layers)
|
||||
])
|
||||
else:
|
||||
layer.weight = self.shared_weights
|
||||
# Assign separate biases
|
||||
layer.bias = self.shared_biases[len(self.shared_layers)]
|
||||
self.shared_layers.append(layer)
|
||||
|
||||
# Unique layers (later stages) have their own weights and biases
|
||||
self.unique_layers = nn.ModuleList()
|
||||
in_channels = self.config.hidden_size
|
||||
for _ in range(self.config.num_shared_layers):
|
||||
self.unique_layers.append(
|
||||
nn.Conv2d(
|
||||
in_channels,
|
||||
self.config.hidden_size,
|
||||
kernel_size=self.config.kernel_size,
|
||||
padding=1
|
||||
)
|
||||
)
|
||||
|
||||
# Activation and pooling layers
|
||||
self.activation_function = getattr(nn, self.config.activation_function)()
|
||||
self.max_pool = nn.MaxPool2d(kernel_size=2)
|
||||
|
||||
# Create a Sequential container with shared layers repeated
|
||||
layers = []
|
||||
for _ in range(self.config.num_hidden_layers):
|
||||
layers.extend([
|
||||
self.conv_layer,
|
||||
self.activation_function,
|
||||
self.max_pool
|
||||
])
|
||||
|
||||
self.cnn = nn.Sequential(*layers)
|
||||
self.max_pool = nn.MaxPool2d(self.config.kernel_size)
|
||||
|
||||
def forward(self, x):
|
||||
return self.cnn(x)
|
||||
for layer in self.shared_layers:
|
||||
x = layer(x)
|
||||
x = self.activation_function(x)
|
||||
x = self.max_pool(x)
|
||||
|
||||
for layer in self.unique_layers:
|
||||
x = layer(x)
|
||||
x = self.activation_function(x)
|
||||
x = self.max_pool(x)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
class AIIABase(AIIA):
|
||||
|
@ -184,4 +208,9 @@ class AIIArecursive(AIIA):
|
|||
processed_patches.append(pp)
|
||||
|
||||
combined_output = torch.mean(torch.stack(processed_patches, dim=0), dim=0)
|
||||
return combined_output
|
||||
return combined_output
|
||||
|
||||
config = AIIAConfig()
|
||||
model2 = AIIABaseShared(config)
|
||||
|
||||
model2.save("shared")
|
Loading…
Reference in New Issue