diff --git a/.gitignore b/.gitignore index 6c16453..aa85191 100644 --- a/.gitignore +++ b/.gitignore @@ -155,7 +155,7 @@ cython_debug/ #ML VeraMind-Mini/ -Token.txt +Token.py # OS generated files # ###################### diff --git a/src/Ai/__init__.py b/src/Ai/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/Ai/llm.py b/src/Ai/llm.py index 792fe91..0b44cfb 100644 --- a/src/Ai/llm.py +++ b/src/Ai/llm.py @@ -1,14 +1,10 @@ from langchain_community.llms import Ollama +from Ai.Token import get_token class ArticleRater: def __init__(self): self.client = "https://ai.fabelous.app/v1/ollama/generic" - self.headers = self._load_token("Token.txt") - - def _load_token(self, token_path): - with open(token_path, "r") as f: - token = f.read().strip() - return {"Authorization": f"Token {token}"} + self.headers = {"Authorization": f"Token {get_token()}"} def get_response(self, article, result, confidence): ollama_params = { diff --git a/src/controller/mainFrameController.py b/src/controller/mainFrameController.py index 606d7ae..7406d0a 100644 --- a/src/controller/mainFrameController.py +++ b/src/controller/mainFrameController.py @@ -8,8 +8,8 @@ from models.provider import Provider from collections import Counter from Ai.llm import ArticleRater -BAD_WORDS = ["FAKE", "SATIRE", "Fake", "fake"] -GOOD_WORDS = ["REAL", "real", "Real"] +BAD_WORDS = ["FAKE", "SATIRE", "Fake", "fake", "fake news", "Fake News", "FakeNews"] +GOOD_WORDS = ["REAL", "real ", "Real", "Reale News", "reale", "reale News", "realen", "Real News"] BAD_COLOR = "#ff8080" GOOD_COLOR = "#80ff8f" WORDS = BAD_WORDS + GOOD_WORDS @@ -43,12 +43,13 @@ class MainFrameController: text_data.url = self.frame.entry_url.get() if not text_data.text_from_url(): text_data.text = self.frame.input_textbox.get("0.0", "end") + text_data.provider = "Unknown" return text_data def press_check_button(self): text_data = self.get_text_data() - print(text_data.text) - self._predict(text_data) + text_data = self._predict(text_data) + self._add_to_db(text_data) self.frame.output_textbox.configure(state="normal") self.frame.output_textbox.delete("0.0", "end") @@ -67,24 +68,24 @@ class MainFrameController: # Process highlighting when buffer is full if len(highlight_buffer) == 5: - self.process_highlighting(highlight_buffer) + self._process_highlighting(highlight_buffer) # Process any remaining chunks in the buffer if highlight_buffer: - self.process_highlighting(highlight_buffer) + self._process_highlighting(highlight_buffer) self.frame.output_textbox.configure(state="disabled") self.update_provider_list() - def process_highlighting(self, highlight_buffer): + def _process_highlighting(self, highlight_buffer): start_index = self.frame.output_textbox.index(f"end-{sum(len(c) for c in highlight_buffer)}c") end_index = self.frame.output_textbox.index("end") - self.highlight_words(start_index, end_index) + self._highlight_words(start_index, end_index) # Keep overlap of 2 chunks - highlight_buffer = deque(list(highlight_buffer)[-2:], maxlen=5) + highlight_buffer = deque(list(highlight_buffer)[-3:], maxlen=5) - def highlight_words(self, start_index, end_index): + def _highlight_words(self, start_index, end_index): content = self.frame.output_textbox.get(start_index, end_index) for word in WORDS: @@ -123,7 +124,7 @@ class MainFrameController: :param text_data: TextData object containing the analyzed information """ - self.db.insert_data(url=text_data.url, anbieter=text_data.get_provider(),is_fake_news= text_data.is_fake_news) + self.db.insert_data(url=text_data.url, anbieter=text_data.get_provider(), is_fake_news= text_data.is_fake_news) def _fetch_db_data(self): self.text_data_list = [] @@ -146,14 +147,18 @@ class MainFrameController: # Zähle die Häufigkeit jedes Providers provider_counts = Counter(text_data.provider for text_data in text_data_list if text_data.provider) - # Erstelle und sortiere die Provider-Liste - sorted_providers = [ + # Erstelle die Provider-Liste + providers = [ Provider(name, count, provider_groups.get(name, [])) - for name, count in sorted(provider_counts.items(), key=lambda x: x[1], reverse=True) + for name, count in provider_counts.items() ] + # Sortiere die Provider-Liste nach dem Fake-Prozentsatz (absteigend) + sorted_providers = sorted(providers, key=lambda x: x.get_fake_percentage(), reverse=True) + return sorted_providers + def update_provider_list(self): self._fetch_db_data() # Lösche vorhandene Einträge in der scrollbaren Ansicht diff --git a/src/models/data.py b/src/models/data.py index 3f28398..08d3a51 100644 --- a/src/models/data.py +++ b/src/models/data.py @@ -21,7 +21,7 @@ class TextData: def text_from_url(self)-> bool: if not self.url: print("No url") - return True + return False if not self.text: print("Extrahiere Text von URL...") @@ -29,7 +29,7 @@ class TextData: self._extractor.fetch_content() self._extractor.extract_text() self.text = self._extractor.get_text() - return False + return True def get_output(self): @@ -50,10 +50,10 @@ class TextData: :return: The extracted domain or None if the URL is invalid """ if not self._is_valid_url(self.url): - self.provider = "None" + self.provider = "Unknown" parsed_url = urlparse(self.url) domain_parts = parsed_url.netloc.split('.') - self.provider = f"{domain_parts[-2]}.{domain_parts[-1]}" if len(domain_parts) >= 2 else "None" + self.provider = f"{domain_parts[-2]}.{domain_parts[-1]}" if len(domain_parts) >= 2 else "Unknown" def _is_valid_url(self, url: str) -> bool: """