limit data loadng to 10k
This commit is contained in:
parent
e0abdb9d39
commit
a8cd9b00e5
|
@ -12,8 +12,8 @@ from aiia.data.DataLoader import AIIADataLoader
|
||||||
|
|
||||||
def pretrain_model(data_path1, data_path2, num_epochs=3):
|
def pretrain_model(data_path1, data_path2, num_epochs=3):
|
||||||
# Merge the two parquet files
|
# Merge the two parquet files
|
||||||
df1 = pd.read_parquet(data_path1)
|
df1 = pd.read_parquet(data_path1).head(10000)
|
||||||
df2 = pd.read_parquet(data_path2)
|
df2 = pd.read_parquet(data_path2).head(10000)
|
||||||
merged_df = pd.concat([df1, df2], ignore_index=True)
|
merged_df = pd.concat([df1, df2], ignore_index=True)
|
||||||
|
|
||||||
# Create a new AIIAConfig instance
|
# Create a new AIIAConfig instance
|
||||||
|
|
Loading…
Reference in New Issue