Merge pull request 'updated memory fix' (#26 ) from feat/save_fix into main

Reviewed-on: #26
updated memory fix
2025-07-03 11:50:07 +00:00 · 2025-07-03 13:49:45 +02:00
1 changed files with 18 additions and 5 deletions
--- a/src/aiunn/finetune/memory_trainer.py
+++ b/src/aiunn/finetune/memory_trainer.py
@ -241,6 +241,7 @@ class MemoryOptimizedTrainer(aiuNNTrainer):
                                total=len(train_batches),
                                desc=f"Epoch {epoch + 1}/{epochs}")

+
            for batch_idx, (low_res, high_res) in progress_bar:
                # Move data to device
                low_res = low_res.to(self.device, non_blocking=True).to(memory_format=torch.channels_last)
@ -251,8 +252,10 @@ class MemoryOptimizedTrainer(aiuNNTrainer):
                    if hasattr(self, 'use_checkpointing') and self.use_checkpointing:
                        low_res.requires_grad_()
                        outputs = checkpoint(self.model, low_res)
+                        outputs = outputs.clone()  # <-- Clone added here
                    else:
                        outputs = self.model(low_res)
+                        outputs = outputs.clone()  # <-- Clone added here
                    loss = self.criterion(outputs, high_res)
                    
                    # Scale loss for gradient accumulation
@ -348,6 +351,16 @@ class MemoryOptimizedTrainer(aiuNNTrainer):
        
        return self.best_loss

+        
+        # Stop memory monitoring
+        if self.use_memory_profiling:
+            self.stop_monitoring = True
+            if self.memory_monitor_thread:
+                self.memory_monitor_thread.join(timeout=1)
+            print(f"Training completed. Peak GPU memory usage: {self.peak_memory:.2f}GB")
+        
+        return self.best_loss
+
    def get_memory_summary(self):
        """Get a summary of memory usage during training"""
        if not self.memory_stats:
Author	SHA1	Message	Date
Falko Victor Habel	e7b9da37d6	Merge pull request 'updated memory fix' (#26 ) from feat/save_fix into main Run VectorLoader Script / Explore-Gitea-Actions (push) Successful in 20s Details Gitea Actions For AIIA / Explore-Gitea-Actions (push) Successful in 9m36s Details Reviewed-on: #26	2025-07-03 11:50:07 +00:00
Falko Victor Habel	159ada872b	updated memory fix Gitea Actions For AIIA / Explore-Gitea-Actions (push) Successful in 9m43s Details	2025-07-03 13:49:45 +02:00