From 6d1fc4c88d1b8b3af727b5f8e6ee4fbfcb13686c Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Thu, 3 Jul 2025 09:53:10 +0200 Subject: [PATCH] bilinear upsampling followed by a convolution instead --- src/aiunn/upsampler/aiunn.py | 47 +++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/src/aiunn/upsampler/aiunn.py b/src/aiunn/upsampler/aiunn.py index f60952f..9db6ba9 100644 --- a/src/aiunn/upsampler/aiunn.py +++ b/src/aiunn/upsampler/aiunn.py @@ -1,39 +1,44 @@ -import os -import torch import torch.nn as nn -import warnings from aiia.model.Model import AIIAConfig, AIIABase from transformers import PreTrainedModel from .config import aiuNNConfig -import warnings + class aiuNN(PreTrainedModel): config_class = aiuNNConfig + def __init__(self, config: aiuNNConfig, base_model: PreTrainedModel = None): super().__init__(config) self.config = config + + # Copy base layers into aiuNN for self-containment and portability if base_model is not None: - # Only copy submodules if base_model is provided - self.base_layers = nn.Sequential(*[layer for layer in base_model.cnn]) + if hasattr(base_model, 'cnn'): + self.base_layers = nn.Sequential(*[layer for layer in base_model.cnn]) + elif hasattr(base_model, 'shared_layer') and hasattr(base_model, 'unique_layers'): + layers = [base_model.shared_layer, base_model.activation, base_model.max_pool] + for ul in base_model.unique_layers: + layers.extend([ul, base_model.activation, base_model.max_pool]) + self.base_layers = nn.Sequential(*layers) + else: + self.base_layers = self._build_base_layers_from_config(config) else: - # At inference, modules will be loaded from state_dict self.base_layers = self._build_base_layers_from_config(config) - scale_factor = self.config.upsample_scale - out_channels = self.config.num_channels * (scale_factor ** 2) - self.pixel_shuffle_conv = nn.Conv2d( + # Bilinear upsampling head + self.upsample = nn.Upsample( + scale_factor=self.config.upsample_scale, + mode='bilinear', + align_corners=False + ) + self.final_conv = nn.Conv2d( in_channels=self.config.hidden_size, - out_channels=out_channels, - kernel_size=self.config.kernel_size, + out_channels=self.config.num_channels, + kernel_size=3, padding=1 ) - self.pixel_shuffle = nn.PixelShuffle(scale_factor) def _build_base_layers_from_config(self, config): - """ - Reconstruct the base layers (e.g., CNN) using only the config. - This must match exactly how your base model builds its layers! - """ layers = [] in_channels = config.num_channels for _ in range(config.num_hidden_layers): @@ -47,14 +52,12 @@ class aiuNN(PreTrainedModel): return nn.Sequential(*layers) def forward(self, x): - if self.base_layers is not None: - x = self.base_layers(x) - x = self.pixel_shuffle_conv(x) - x = self.pixel_shuffle(x) + x = self.base_layers(x) + x = self.upsample(x) + x = self.final_conv(x) return x - if __name__ == "__main__": from aiia import AIIABase, AIIAConfig # Create a configuration and build a base model.