added _enhanced_memory_cleanup
Gitea Actions For AIIA / Explore-Gitea-Actions (push) Successful in 9m44s
Details
Gitea Actions For AIIA / Explore-Gitea-Actions (push) Successful in 9m44s
Details
This commit is contained in:
parent
622cc2cc35
commit
2bb73cc8f4
|
@ -193,6 +193,47 @@ class MemoryOptimizedTrainer(aiuNNTrainer):
|
|||
self.model.train()
|
||||
return val_loss / max(num_batches, 1)
|
||||
|
||||
def _enhanced_memory_cleanup(self):
|
||||
"""Enhanced memory cleanup for CUDAGraph compatibility and memory optimization"""
|
||||
|
||||
# Clear gradients properly - set to None for better memory efficiency
|
||||
if hasattr(self, 'optimizer') and self.optimizer is not None:
|
||||
self.optimizer.zero_grad(set_to_none=True)
|
||||
|
||||
# Clear model gradients explicitly
|
||||
if hasattr(self, 'model') and self.model is not None:
|
||||
for param in self.model.parameters():
|
||||
if param.grad is not None:
|
||||
param.grad = None
|
||||
|
||||
# Force Python garbage collection
|
||||
import gc
|
||||
gc.collect()
|
||||
|
||||
# Clear PyTorch CUDA cache and synchronize
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.synchronize()
|
||||
|
||||
# Reset memory stats periodically
|
||||
if hasattr(torch.cuda, 'reset_peak_memory_stats'):
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
|
||||
# Clear any lingering autograd computation graphs
|
||||
with torch.no_grad():
|
||||
pass
|
||||
|
||||
# Mark new step for CUDAGraphs to prevent tensor conflicts
|
||||
if self.use_model_compilation and hasattr(torch.compiler, 'cudagraph_mark_step_begin'):
|
||||
torch.compiler.cudagraph_mark_step_begin()
|
||||
|
||||
# Clear any cached compilation artifacts
|
||||
if hasattr(torch, '_dynamo') and hasattr(torch._dynamo, 'reset'):
|
||||
try:
|
||||
torch._dynamo.reset()
|
||||
except Exception:
|
||||
pass # Ignore if reset fails
|
||||
|
||||
|
||||
def finetune(self, output_path, epochs=10, lr=1e-4, patience=3, min_delta=0.001):
|
||||
"""Enhanced training with memory optimizations"""
|
||||
|
|
Loading…
Reference in New Issue