limit data loadng to 10k

This commit is contained in:
Falko Victor Habel 2025-01-26 18:41:14 +01:00
parent e0abdb9d39
commit a8cd9b00e5
1 changed files with 2 additions and 2 deletions

View File

@ -12,8 +12,8 @@ from aiia.data.DataLoader import AIIADataLoader
def pretrain_model(data_path1, data_path2, num_epochs=3): def pretrain_model(data_path1, data_path2, num_epochs=3):
# Merge the two parquet files # Merge the two parquet files
df1 = pd.read_parquet(data_path1) df1 = pd.read_parquet(data_path1).head(10000)
df2 = pd.read_parquet(data_path2) df2 = pd.read_parquet(data_path2).head(10000)
merged_df = pd.concat([df1, df2], ignore_index=True) merged_df = pd.concat([df1, df2], ignore_index=True)
# Create a new AIIAConfig instance # Create a new AIIAConfig instance