diff --git a/README.md b/README.md index 703fcfc..54fae1c 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,72 @@ # VeraMind -Open Weights Fake News Detection Model and Inference \ No newline at end of file + +The VeraMind is an open-source Python application built using the Hugging Face Transformers library and PyTorch. It leverages a pre-trained model (`VeraMind-Mini`) to predict whether a given news article is real or fake with a confidence score. + +This project is licensed under the [Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International (CC BY-NC-ND 4.0)](https://creativecommons.org/licenses/by-nc-nd/4.0/) license. You are free to use and share this model privately, but you must give appropriate credit, not use it for commercial purposes, and not distribute derivative works. + +**Note:** This is a machine learning model and may make mistakes. It should not replace your own critical thinking when evaluating news authenticity. Always verify information from multiple reliable sources. + +## Features + +- Predicts if a given news article is real or fake. +- Provides a confidence score for the prediction. +- Utilizes the Hugging Face Transformers library for easy integration with other NLP models. + +## Installation + +1. Clone this repository: + +```bash +git clone https://github.com/yourusername/VeraMind.git +cd VeraMind +``` + +2. Install the required dependencies: + +```bash +pip install -r requirements.txt +``` + +## Usage + +### Predicting News Authenticity + +Here's how you can use the model to predict if a news article is real or fake: + +```python +from src.Inference import VeraMindInference + +# Load the model +model = VeraMindInference("path/to/VeraMind-Mini") + +# Example news article text +text = "This is an example News Article" + +# Predict if the news is real or fake +result = model.predict(text) + +print(result) +``` + +The output will be a dictionary containing the result ("REAL" or "FAKE") and the confidence score: + +```python +{'result': 'FAKE', 'confidence': 0.9990140199661255} +``` + +## Model Architecture + +The `VeraMind-Mini` model used in this application is a fine-tuned version of the [DistilBERT](https://huggingface.co/distilbert-base-uncased) model for binary text classification. It's designed to distinguish between real and fake news articles. + +## Disclaimer + +This project is provided as-is, without any express or implied warranty. The maintainers are not responsible for any damages arising from the use of this software. + +Always remember that machine learning models can make mistakes, so use this tool responsibly and critically evaluate its predictions. + +## Citation + +If you use this model in your research, please cite it as follows: + +> **VeraMind News Authenticity Checker** (2024). Retrieved from https://gitea.fabelous.app/Fabel/VeraMind by Falko Habel \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..62dcd04 --- /dev/null +++ b/main.py @@ -0,0 +1,15 @@ +from src.Inference import VeraMindInference + + +# load model +model = VeraMindInference("path/to/VeraMind-Mini") + + +text = "This is a example News Article" + +# predict if News are reel or Fake +result = model.predict(text) + +# Example Output +# {'result': 'FAKE', 'confidence': 0.9990140199661255} +print(result) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9216a18 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +torch +transformers \ No newline at end of file diff --git a/src/Inference.py b/src/Inference.py new file mode 100644 index 0000000..6814509 --- /dev/null +++ b/src/Inference.py @@ -0,0 +1,38 @@ +import torch +from transformers import AutoTokenizer, AutoModelForSequenceClassification + +class VeraMindInference: + def __init__(self, model_path, max_len=512): + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.tokenizer = AutoTokenizer.from_pretrained(model_path) + self.model = AutoModelForSequenceClassification.from_pretrained(model_path) + self.model.to(self.device) + self.model.eval() + self.max_len = max_len + + def predict(self, text): + encoding = self.tokenizer.encode_plus( + text, + add_special_tokens=True, + max_length=self.max_len, + return_token_type_ids=False, + padding='max_length', + truncation=True, + return_attention_mask=True, + return_tensors='pt', + ) + + input_ids = encoding['input_ids'].to(self.device) + attention_mask = encoding['attention_mask'].to(self.device) + + with torch.no_grad(): + outputs = self.model(input_ids, attention_mask=attention_mask).logits + prediction = torch.sigmoid(outputs).cpu().numpy()[0][0] + + is_fake = prediction >= 0.5 + confidence = prediction if is_fake else 1 - prediction + + return { + "result": "FAKE" if is_fake else "REAL", + "confidence": float(confidence) + } \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29