Compare commits
2 Commits
beb9eedc1f
...
cd8e1857ea
Author | SHA1 | Date |
---|---|---|
Falko Victor Habel | cd8e1857ea | |
Falko Victor Habel | b55fc02ede |
|
@ -1,25 +0,0 @@
|
||||||
{
|
|
||||||
"architectures": [
|
|
||||||
"BertForSequenceClassification"
|
|
||||||
],
|
|
||||||
"attention_probs_dropout_prob": 0.1,
|
|
||||||
"classifier_dropout": null,
|
|
||||||
"hidden_act": "gelu",
|
|
||||||
"hidden_dropout_prob": 0.1,
|
|
||||||
"hidden_size": 384,
|
|
||||||
"initializer_range": 0.02,
|
|
||||||
"intermediate_size": 1536,
|
|
||||||
"layer_norm_eps": 1e-12,
|
|
||||||
"max_position_embeddings": 512,
|
|
||||||
"model_type": "bert",
|
|
||||||
"num_attention_heads": 6,
|
|
||||||
"num_hidden_layers": 6,
|
|
||||||
"pad_token_id": 0,
|
|
||||||
"position_embedding_type": "absolute",
|
|
||||||
"problem_type": "single_label_classification",
|
|
||||||
"torch_dtype": "float32",
|
|
||||||
"transformers_version": "4.44.0",
|
|
||||||
"type_vocab_size": 2,
|
|
||||||
"use_cache": true,
|
|
||||||
"vocab_size": 119547
|
|
||||||
}
|
|
Binary file not shown.
|
@ -1,7 +0,0 @@
|
||||||
{
|
|
||||||
"cls_token": "[CLS]",
|
|
||||||
"mask_token": "[MASK]",
|
|
||||||
"pad_token": "[PAD]",
|
|
||||||
"sep_token": "[SEP]",
|
|
||||||
"unk_token": "[UNK]"
|
|
||||||
}
|
|
|
@ -1,57 +0,0 @@
|
||||||
{
|
|
||||||
"added_tokens_decoder": {
|
|
||||||
"0": {
|
|
||||||
"content": "[PAD]",
|
|
||||||
"lstrip": false,
|
|
||||||
"normalized": false,
|
|
||||||
"rstrip": false,
|
|
||||||
"single_word": false,
|
|
||||||
"special": true
|
|
||||||
},
|
|
||||||
"100": {
|
|
||||||
"content": "[UNK]",
|
|
||||||
"lstrip": false,
|
|
||||||
"normalized": false,
|
|
||||||
"rstrip": false,
|
|
||||||
"single_word": false,
|
|
||||||
"special": true
|
|
||||||
},
|
|
||||||
"101": {
|
|
||||||
"content": "[CLS]",
|
|
||||||
"lstrip": false,
|
|
||||||
"normalized": false,
|
|
||||||
"rstrip": false,
|
|
||||||
"single_word": false,
|
|
||||||
"special": true
|
|
||||||
},
|
|
||||||
"102": {
|
|
||||||
"content": "[SEP]",
|
|
||||||
"lstrip": false,
|
|
||||||
"normalized": false,
|
|
||||||
"rstrip": false,
|
|
||||||
"single_word": false,
|
|
||||||
"special": true
|
|
||||||
},
|
|
||||||
"103": {
|
|
||||||
"content": "[MASK]",
|
|
||||||
"lstrip": false,
|
|
||||||
"normalized": false,
|
|
||||||
"rstrip": false,
|
|
||||||
"single_word": false,
|
|
||||||
"special": true
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"clean_up_tokenization_spaces": true,
|
|
||||||
"cls_token": "[CLS]",
|
|
||||||
"do_basic_tokenize": true,
|
|
||||||
"do_lower_case": false,
|
|
||||||
"mask_token": "[MASK]",
|
|
||||||
"model_max_length": 512,
|
|
||||||
"never_split": null,
|
|
||||||
"pad_token": "[PAD]",
|
|
||||||
"sep_token": "[SEP]",
|
|
||||||
"strip_accents": null,
|
|
||||||
"tokenize_chinese_chars": true,
|
|
||||||
"tokenizer_class": "BertTokenizer",
|
|
||||||
"unk_token": "[UNK]"
|
|
||||||
}
|
|
File diff suppressed because it is too large
Load Diff
|
@ -114,7 +114,7 @@ if __name__ == '__main__':
|
||||||
train_df, val_df = train_test_split(df, test_size=0.35, random_state=42)
|
train_df, val_df = train_test_split(df, test_size=0.35, random_state=42)
|
||||||
|
|
||||||
# Initialize and train the model
|
# Initialize and train the model
|
||||||
trainer = FakeNewsModelTrainer(size_factor=0.25)
|
trainer = FakeNewsModelTrainer(size_factor=0.5)
|
||||||
train_data = trainer.prepare_data(train_df)
|
train_data = trainer.prepare_data(train_df)
|
||||||
val_data = trainer.prepare_data(val_df)
|
val_data = trainer.prepare_data(val_df)
|
||||||
trainer.train(train_data, val_data)
|
trainer.train(train_data, val_data)
|
||||||
|
|
Reference in New Issue