Compare commits
No commits in common. "cd8e1857eaf6b6afc685208ade92022f594c2d9c" and "beb9eedc1f92bfa5ee488cea40e3b5860ca9d6cc" have entirely different histories.
cd8e1857ea
...
beb9eedc1f
|
@ -0,0 +1,25 @@
|
||||||
|
{
|
||||||
|
"architectures": [
|
||||||
|
"BertForSequenceClassification"
|
||||||
|
],
|
||||||
|
"attention_probs_dropout_prob": 0.1,
|
||||||
|
"classifier_dropout": null,
|
||||||
|
"hidden_act": "gelu",
|
||||||
|
"hidden_dropout_prob": 0.1,
|
||||||
|
"hidden_size": 384,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"intermediate_size": 1536,
|
||||||
|
"layer_norm_eps": 1e-12,
|
||||||
|
"max_position_embeddings": 512,
|
||||||
|
"model_type": "bert",
|
||||||
|
"num_attention_heads": 6,
|
||||||
|
"num_hidden_layers": 6,
|
||||||
|
"pad_token_id": 0,
|
||||||
|
"position_embedding_type": "absolute",
|
||||||
|
"problem_type": "single_label_classification",
|
||||||
|
"torch_dtype": "float32",
|
||||||
|
"transformers_version": "4.44.0",
|
||||||
|
"type_vocab_size": 2,
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 119547
|
||||||
|
}
|
Binary file not shown.
|
@ -0,0 +1,7 @@
|
||||||
|
{
|
||||||
|
"cls_token": "[CLS]",
|
||||||
|
"mask_token": "[MASK]",
|
||||||
|
"pad_token": "[PAD]",
|
||||||
|
"sep_token": "[SEP]",
|
||||||
|
"unk_token": "[UNK]"
|
||||||
|
}
|
|
@ -0,0 +1,57 @@
|
||||||
|
{
|
||||||
|
"added_tokens_decoder": {
|
||||||
|
"0": {
|
||||||
|
"content": "[PAD]",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"100": {
|
||||||
|
"content": "[UNK]",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"101": {
|
||||||
|
"content": "[CLS]",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"102": {
|
||||||
|
"content": "[SEP]",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"103": {
|
||||||
|
"content": "[MASK]",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"clean_up_tokenization_spaces": true,
|
||||||
|
"cls_token": "[CLS]",
|
||||||
|
"do_basic_tokenize": true,
|
||||||
|
"do_lower_case": false,
|
||||||
|
"mask_token": "[MASK]",
|
||||||
|
"model_max_length": 512,
|
||||||
|
"never_split": null,
|
||||||
|
"pad_token": "[PAD]",
|
||||||
|
"sep_token": "[SEP]",
|
||||||
|
"strip_accents": null,
|
||||||
|
"tokenize_chinese_chars": true,
|
||||||
|
"tokenizer_class": "BertTokenizer",
|
||||||
|
"unk_token": "[UNK]"
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -114,7 +114,7 @@ if __name__ == '__main__':
|
||||||
train_df, val_df = train_test_split(df, test_size=0.35, random_state=42)
|
train_df, val_df = train_test_split(df, test_size=0.35, random_state=42)
|
||||||
|
|
||||||
# Initialize and train the model
|
# Initialize and train the model
|
||||||
trainer = FakeNewsModelTrainer(size_factor=0.5)
|
trainer = FakeNewsModelTrainer(size_factor=0.25)
|
||||||
train_data = trainer.prepare_data(train_df)
|
train_data = trainer.prepare_data(train_df)
|
||||||
val_data = trainer.prepare_data(val_df)
|
val_data = trainer.prepare_data(val_df)
|
||||||
trainer.train(train_data, val_data)
|
trainer.train(train_data, val_data)
|
||||||
|
|
Reference in New Issue