downsized trainingdata from 20k to 1k

This commit is contained in:
Falko Victor Habel 2025-01-27 08:44:46 +01:00
parent 3749ba9c5f
commit d205346741
1 changed files with 3 additions and 3 deletions

View File

@ -9,13 +9,13 @@ import copy
def pretrain_model(data_path1, data_path2, num_epochs=3):
# Read and merge datasets
df1 = pd.read_parquet(data_path1).head(10000)
df2 = pd.read_parquet(data_path2).head(10000)
df1 = pd.read_parquet(data_path1).head(5000)
df2 = pd.read_parquet(data_path2).head(5000)
merged_df = pd.concat([df1, df2], ignore_index=True)
# Model configuration
config = AIIAConfig(
model_name="AIIA-Base-512x20k",
model_name="AIIA-Base-512x10k",
)
# Initialize model and data loader