corrected dataset path, increased epoch and batch_size

This commit is contained in:
Falko Victor Habel 2024-08-31 08:09:49 +02:00
parent de0699d6ba
commit aa6c343c40
1 changed files with 2 additions and 2 deletions

View File

@ -27,7 +27,7 @@ class FakeNewsModelTrainer:
return TensorDataset(input_ids, attention_mask, labels) return TensorDataset(input_ids, attention_mask, labels)
def train(self, train_data, val_data, epochs=3, batch_size=16): def train(self, train_data, val_data, epochs=13, batch_size=64):
train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True) train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_data, batch_size=batch_size) val_dataloader = DataLoader(val_data, batch_size=batch_size)
@ -79,7 +79,7 @@ class FakeNewsModelTrainer:
# Usage example # Usage example
if __name__ == '__main__': if __name__ == '__main__':
# Load and preprocess the data # Load and preprocess the data
df = pq.read_table('your_dataset.parquet').to_pandas() df = pq.read_table('dataset.parquet').to_pandas()
df['text'] = df['title'] + ' ' + df['text'] # Combine title and text df['text'] = df['title'] + ' ' + df['text'] # Combine title and text
# Split the data # Split the data