From d205346741a7d0a71000eee9fe4bc848dccf82cb Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Mon, 27 Jan 2025 08:44:46 +0100 Subject: [PATCH] downsized trainingdata from 20k to 1k --- src/pretrain.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pretrain.py b/src/pretrain.py index 4d98538..505b032 100644 --- a/src/pretrain.py +++ b/src/pretrain.py @@ -9,13 +9,13 @@ import copy def pretrain_model(data_path1, data_path2, num_epochs=3): # Read and merge datasets - df1 = pd.read_parquet(data_path1).head(10000) - df2 = pd.read_parquet(data_path2).head(10000) + df1 = pd.read_parquet(data_path1).head(5000) + df2 = pd.read_parquet(data_path2).head(5000) merged_df = pd.concat([df1, df2], ignore_index=True) # Model configuration config = AIIAConfig( - model_name="AIIA-Base-512x20k", + model_name="AIIA-Base-512x10k", ) # Initialize model and data loader