From 103d2eee63f6a6bc650f8a0999ebd5d76aa705de Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Thu, 10 Oct 2024 10:32:53 +0200 Subject: [PATCH 1/3] bug fix --- src/controller/mainFrameController.py | 17 ++++++++--------- src/models/data.py | 4 ++-- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/controller/mainFrameController.py b/src/controller/mainFrameController.py index 606d7ae..fb93d42 100644 --- a/src/controller/mainFrameController.py +++ b/src/controller/mainFrameController.py @@ -8,8 +8,8 @@ from models.provider import Provider from collections import Counter from Ai.llm import ArticleRater -BAD_WORDS = ["FAKE", "SATIRE", "Fake", "fake"] -GOOD_WORDS = ["REAL", "real", "Real"] +BAD_WORDS = ["FAKE", "SATIRE", "Fake", "fake", "fake news", "Fake News"] +GOOD_WORDS = ["REAL", "real", "Real", "Reale News", "reale", "reale News", "realen"] BAD_COLOR = "#ff8080" GOOD_COLOR = "#80ff8f" WORDS = BAD_WORDS + GOOD_WORDS @@ -47,7 +47,6 @@ class MainFrameController: def press_check_button(self): text_data = self.get_text_data() - print(text_data.text) self._predict(text_data) self.frame.output_textbox.configure(state="normal") self.frame.output_textbox.delete("0.0", "end") @@ -67,24 +66,24 @@ class MainFrameController: # Process highlighting when buffer is full if len(highlight_buffer) == 5: - self.process_highlighting(highlight_buffer) + self._process_highlighting(highlight_buffer) # Process any remaining chunks in the buffer if highlight_buffer: - self.process_highlighting(highlight_buffer) + self._process_highlighting(highlight_buffer) self.frame.output_textbox.configure(state="disabled") self.update_provider_list() - def process_highlighting(self, highlight_buffer): + def _process_highlighting(self, highlight_buffer): start_index = self.frame.output_textbox.index(f"end-{sum(len(c) for c in highlight_buffer)}c") end_index = self.frame.output_textbox.index("end") - self.highlight_words(start_index, end_index) + self._highlight_words(start_index, end_index) # Keep overlap of 2 chunks - highlight_buffer = deque(list(highlight_buffer)[-2:], maxlen=5) + highlight_buffer = deque(list(highlight_buffer)[-3:], maxlen=5) - def highlight_words(self, start_index, end_index): + def _highlight_words(self, start_index, end_index): content = self.frame.output_textbox.get(start_index, end_index) for word in WORDS: diff --git a/src/models/data.py b/src/models/data.py index 3f28398..96acb43 100644 --- a/src/models/data.py +++ b/src/models/data.py @@ -21,7 +21,7 @@ class TextData: def text_from_url(self)-> bool: if not self.url: print("No url") - return True + return False if not self.text: print("Extrahiere Text von URL...") @@ -29,7 +29,7 @@ class TextData: self._extractor.fetch_content() self._extractor.extract_text() self.text = self._extractor.get_text() - return False + return True def get_output(self): From d4c32c581d22b5761b0ff7be761616d5d9399172 Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Thu, 10 Oct 2024 11:02:42 +0200 Subject: [PATCH 2/3] llm bugfix --- .gitignore | 2 +- src/Ai/__init__.py | 0 src/Ai/llm.py | 8 ++------ src/controller/mainFrameController.py | 6 +++--- src/models/data.py | 4 ++-- 5 files changed, 8 insertions(+), 12 deletions(-) create mode 100644 src/Ai/__init__.py diff --git a/.gitignore b/.gitignore index 6c16453..aa85191 100644 --- a/.gitignore +++ b/.gitignore @@ -155,7 +155,7 @@ cython_debug/ #ML VeraMind-Mini/ -Token.txt +Token.py # OS generated files # ###################### diff --git a/src/Ai/__init__.py b/src/Ai/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/Ai/llm.py b/src/Ai/llm.py index 792fe91..0b44cfb 100644 --- a/src/Ai/llm.py +++ b/src/Ai/llm.py @@ -1,14 +1,10 @@ from langchain_community.llms import Ollama +from Ai.Token import get_token class ArticleRater: def __init__(self): self.client = "https://ai.fabelous.app/v1/ollama/generic" - self.headers = self._load_token("Token.txt") - - def _load_token(self, token_path): - with open(token_path, "r") as f: - token = f.read().strip() - return {"Authorization": f"Token {token}"} + self.headers = {"Authorization": f"Token {get_token()}"} def get_response(self, article, result, confidence): ollama_params = { diff --git a/src/controller/mainFrameController.py b/src/controller/mainFrameController.py index fb93d42..a46508d 100644 --- a/src/controller/mainFrameController.py +++ b/src/controller/mainFrameController.py @@ -8,8 +8,8 @@ from models.provider import Provider from collections import Counter from Ai.llm import ArticleRater -BAD_WORDS = ["FAKE", "SATIRE", "Fake", "fake", "fake news", "Fake News"] -GOOD_WORDS = ["REAL", "real", "Real", "Reale News", "reale", "reale News", "realen"] +BAD_WORDS = ["FAKE", "SATIRE", "Fake", "fake", "fake news", "Fake News", "FakeNews"] +GOOD_WORDS = ["REAL", "real ", "Real", "Reale News", "reale", "reale News", "realen", "Real News"] BAD_COLOR = "#ff8080" GOOD_COLOR = "#80ff8f" WORDS = BAD_WORDS + GOOD_WORDS @@ -122,7 +122,7 @@ class MainFrameController: :param text_data: TextData object containing the analyzed information """ - self.db.insert_data(url=text_data.url, anbieter=text_data.get_provider(),is_fake_news= text_data.is_fake_news) + self.db.insert_data(url=text_data.url, anbieter=text_data.get_provider(), is_fake_news= text_data.is_fake_news) def _fetch_db_data(self): self.text_data_list = [] diff --git a/src/models/data.py b/src/models/data.py index 96acb43..08d3a51 100644 --- a/src/models/data.py +++ b/src/models/data.py @@ -50,10 +50,10 @@ class TextData: :return: The extracted domain or None if the URL is invalid """ if not self._is_valid_url(self.url): - self.provider = "None" + self.provider = "Unknown" parsed_url = urlparse(self.url) domain_parts = parsed_url.netloc.split('.') - self.provider = f"{domain_parts[-2]}.{domain_parts[-1]}" if len(domain_parts) >= 2 else "None" + self.provider = f"{domain_parts[-2]}.{domain_parts[-1]}" if len(domain_parts) >= 2 else "Unknown" def _is_valid_url(self, url: str) -> bool: """ From 2302652692e5e38972d1f078a535b587dc9dfb1f Mon Sep 17 00:00:00 2001 From: Falko Habel Date: Thu, 10 Oct 2024 11:14:42 +0200 Subject: [PATCH 3/3] updated rotation --- src/controller/mainFrameController.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/controller/mainFrameController.py b/src/controller/mainFrameController.py index a46508d..7406d0a 100644 --- a/src/controller/mainFrameController.py +++ b/src/controller/mainFrameController.py @@ -43,11 +43,13 @@ class MainFrameController: text_data.url = self.frame.entry_url.get() if not text_data.text_from_url(): text_data.text = self.frame.input_textbox.get("0.0", "end") + text_data.provider = "Unknown" return text_data def press_check_button(self): text_data = self.get_text_data() - self._predict(text_data) + text_data = self._predict(text_data) + self._add_to_db(text_data) self.frame.output_textbox.configure(state="normal") self.frame.output_textbox.delete("0.0", "end") @@ -145,14 +147,18 @@ class MainFrameController: # Zähle die Häufigkeit jedes Providers provider_counts = Counter(text_data.provider for text_data in text_data_list if text_data.provider) - # Erstelle und sortiere die Provider-Liste - sorted_providers = [ + # Erstelle die Provider-Liste + providers = [ Provider(name, count, provider_groups.get(name, [])) - for name, count in sorted(provider_counts.items(), key=lambda x: x[1], reverse=True) + for name, count in provider_counts.items() ] + # Sortiere die Provider-Liste nach dem Fake-Prozentsatz (absteigend) + sorted_providers = sorted(providers, key=lambda x: x.get_fake_percentage(), reverse=True) + return sorted_providers + def update_provider_list(self): self._fetch_db_data() # Lösche vorhandene Einträge in der scrollbaren Ansicht