27 lines
760 B
Python
27 lines
760 B
Python
data_path1 = "/root/training_data/vision-dataset/images_checkpoint.parquet"
|
|
data_path2 = "/root/training_data/vision-dataset/vec_images_dataset.parquet"
|
|
|
|
from aiia.model import AIIABase
|
|
from aiia.model.config import AIIAConfig
|
|
from aiia.pretrain import Pretrainer
|
|
|
|
# Create your model
|
|
config = AIIAConfig(model_name="AIIA-Base-512x10k-small", num_hidden_layers=6, hidden_size=256)
|
|
model = AIIABase(config)
|
|
|
|
# Initialize pretrainer with the model
|
|
pretrainer = Pretrainer(model, learning_rate=config.learning_rate, config=config)
|
|
|
|
# List of dataset paths
|
|
dataset_paths = [
|
|
data_path1,
|
|
data_path2
|
|
]
|
|
|
|
# Start training with multiple datasets
|
|
pretrainer.train(
|
|
dataset_paths=dataset_paths,
|
|
num_epochs=10,
|
|
batch_size=2,
|
|
sample_size=10000
|
|
) |