From aa6c343c400bc7b49197ad4e46ec8928387ec3f4 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Sat, 31 Aug 2024 08:09:49 +0200 Subject: [PATCH] corrected dataset path, increased epoch and batch_size --- src/model/train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/model/train.py b/src/model/train.py index 6fcafe2..14f00dc 100644 --- a/src/model/train.py +++ b/src/model/train.py @@ -27,7 +27,7 @@ class FakeNewsModelTrainer: return TensorDataset(input_ids, attention_mask, labels) - def train(self, train_data, val_data, epochs=3, batch_size=16): + def train(self, train_data, val_data, epochs=13, batch_size=64): train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True) val_dataloader = DataLoader(val_data, batch_size=batch_size) @@ -79,7 +79,7 @@ class FakeNewsModelTrainer: # Usage example if __name__ == '__main__': # Load and preprocess the data - df = pq.read_table('your_dataset.parquet').to_pandas() + df = pq.read_table('dataset.parquet').to_pandas() df['text'] = df['title'] + ' ' + df['text'] # Combine title and text # Split the data