diff --git a/src/aiia/model/Model.py b/src/aiia/model/Model.py index 7c8eee6..6c497bb 100644 --- a/src/aiia/model/Model.py +++ b/src/aiia/model/Model.py @@ -193,8 +193,8 @@ class AIIAmoe(AIIA): # To generate gating weights, we first need to determine the feature dimension. # Each expert is assumed to return an output of shape (B, C, H, W); after averaging over H and W, - # we obtain a tensor of shape (B, C) where C is the number of channels (here assumed to be 410). - gate_in_features = 410 # Adjust this if your expert output changes. + # we obtain a tensor of shape (B, C) where C is the number of channels (here assumed to be 224). + gate_in_features = 512 # Adjust this if your expert output changes. # Create a gating network that maps the aggregated features to num_experts weights. self.gate = nn.Sequential(