Merge pull request 'llm_integration' (#12) from llm_integration into develop

Reviewed-on: Berufsschule/Veracity_AI#12
2024-10-10 06:11:29 +00:00 · 2024-10-10 06:11:29 +00:00 · 50b5470ae8
parent d4e7799c22 c1a52b32ad
commit 50b5470ae8
4 changed files with 78 additions and 18 deletions
--- a/src/Ai/llm.py
+++ b/src/Ai/llm.py
@ -10,16 +10,17 @@ class ArticleRater:
            token = f.read().strip()
        return {"Authorization": f"Token {token}"}

-    def get_response(self, article, result):
+    def get_response(self, article, result, confidence):
        ollama_params = {
            "base_url": self.client,
            "model": "mistral-nemo:12b-instruct-2407-q8_0",
            "headers": self.headers,
-            "system": "Give a short explanation max 1-3 sentence why this article is rated like that"
+            "system": """Ein Mashine Learning Model hat einen Text bewertet, ob es sich um FakeNews handelt oder um Reale News. 
+            Erkläre in 1-3 Sätzen warum dieses Modell zu dieser Entscheidung. Beginne die Antwort IMMER mit den Resultaten und Konfidenzen des Models. 
+            DU SOLLST KEINE ÜBERSCHRIFTEN oder ähnliches ERKLÄREN. Du erhählst einen TEXT und sollst erklären wie das RESULTAT zustande kam"""
        }

-        message = (f"A Machine Learning Model labeled an article the following: "
-                   f"Result: {result['result']}, Confidence: {result['confidence']}, The Article: {article}")
+        message = (f"{article}, result: {result}, confidence {confidence}")

        # Initialize the Ollama object with the prepared parameters
        llm = Ollama(**ollama_params)
@ -28,13 +29,14 @@ class ArticleRater:
        return llm.stream(message)

 # Usage
-if __name__ == "main":
+if __name__ == "__main__":
    article_rater = ArticleRater()

-    article = "Example article content."
-    result = {"result": "REAL", "confidence": 0.754}
+    article = """die wöchentliche Glosse von Stefan Kuzmany Thüringer Landtag: AfD will stören - sichert stattdessen Stabilität der Regierung Ramelow Suche starten Suche öffnen Zur Ausgabe Artikel 79 / 79 Eklat bei Landtagssitzung Thüringer Demokratwurst Eine Glosse von Stefan Kuzmany Ordnungsrufe! Mikros aus! Sitzung unterbrochen! Der Thüringer Alterspräsident Jürgen Treutler (AfD) sichert mit kreativer Sitzungsleitung die Stabilität der Regierung Ramelow. 27.09.2024, 13.00 Uhr • aus DER SPIEGEL 40/2024 Zur Merkliste hinzufügen Artikel anhören (2 Minuten) 2 Min X.com Facebook E-Mail Link kopieren Weitere Optionen zum Teilen X.com Facebook E-Mail Messenger WhatsApp Link kopieren Bild vergrößern Jürgen Treutler (AfD) Foto: Bodo Schackow / dpa Sämtliche Sorgen, die AfD könnte nach ihrem dortigen Wahlerfolg die Demokratie in dem osthessischen Bundeslandstrich Thüringen abschaffen, erweisen sich als unbegründet. Tatsächlich zeigte sich auf der konstituierenden Sitzung des Erfurter Landtags am Donnerstag eindrucksvoll die Stabilität des bewährten Systems. Zu verdanken ist diese beruhigende Entwicklung dem wackeren Alterspräsidenten Jürgen Treutler (73), der bei seinem furiosen Debüt als Landesparlamentarier alle Möglichkeiten ausschöpfte, die ihm in seiner Rolle als Sitzungsleiter zustanden – und sogar noch einige mehr. DER SPIEGEL 40/2024 Foto: Melina Mara / The Washington Post / Getty Images Was kommt, falls sie gewinnt?Als Präsidentin würde Kamala Harris in einer krisengeschüttelten Welt regieren. Öffentlich beteuert sie, in die Fußstapfen von Joe Biden zu trreten, aber in der Außenpolitik will Harris eigene Akzente setzen. Für Europa ist das nicht nur eine gute Nachricht – in Sachhen Protektionismus ist sie eine Schülerin Trumps.Lesen Sie unsere Titelgeschichte, weitere Hintergründe und Analysen im digiitalen SPIEGEL. """
+    result = "REAL"
+    confidence = 0.67

    # Capture the stream response
-    response_stream = article_rater.get_response(article, result)
+    response_stream = article_rater.get_response(article, result, confidence=confidence)
    for chunk in response_stream:
        print(chunk, end='', flush=True)
--- a/src/controller/mainFrameController.py
+++ b/src/controller/mainFrameController.py
@ -1,6 +1,14 @@
+from collections import deque
 from views.mainScreen import MainFrame
 from models.data import TextData
 from Ai.interence import VeraMindInference
+from Ai.llm import ArticleRater
+
+BAD_WORDS = ["FAKE", "SATIRE", "Fake", "fake"]
+GOOD_WORDS = ["REAL", "real", "Real"]
+BAD_COLOR = "#ff8080"
+GOOD_COLOR = "#80ff8f"
+WORDS = BAD_WORDS + GOOD_WORDS


 class MainFrameController:
@ -8,7 +16,7 @@ class MainFrameController:
    def __init__(self,frame:MainFrame) -> None:
        self.frame = frame
        self.model_inference = VeraMindInference('VeraMind-Mini')
-        
+        self.rater = ArticleRater()
        
    def get_textdata(self) -> TextData:
        text_data = TextData()
@ -20,12 +28,61 @@ class MainFrameController:
    
    def press_check_button(self):
        text_data = self.get_textdata()
-        print(f"text:{text_data.text}")
+        print(text_data.text)
        self.prediction(text_data)
        self.frame.output_textbox.configure(state="normal")
        self.frame.output_textbox.delete("0.0", "end")
-        self.frame.output_textbox.insert("0.0",f"{text_data.get_output()}")
+
+        response_stream = self.rater.get_response(text_data.text, text_data.result, float(f"{text_data.confidence * 100:.2f}"))
+
+        highlight_buffer = deque(maxlen=5)
+
+        for chunk in response_stream:
+            # Display the chunk immediately
+            self.frame.output_textbox.insert("end", chunk)
+            self.frame.output_textbox.see("end")
+            self.frame.update_idletasks()
+
+            # Add to highlight buffer
+            highlight_buffer.append(chunk)
+
+            # Process highlighting when buffer is full
+            if len(highlight_buffer) == 5:
+                self.process_highlighting(highlight_buffer)
+
+        # Process any remaining chunks in the buffer
+        if highlight_buffer:
+            self.process_highlighting(highlight_buffer)
+
        self.frame.output_textbox.configure(state="disabled")
+
+    def process_highlighting(self, highlight_buffer):
+        start_index = self.frame.output_textbox.index(f"end-{sum(len(c) for c in highlight_buffer)}c")
+        end_index = self.frame.output_textbox.index("end")
+        self.highlight_words(start_index, end_index)
+
+        # Keep overlap of 2 chunks
+        highlight_buffer = deque(list(highlight_buffer)[-2:], maxlen=5)
+
+    def highlight_words(self, start_index, end_index):
+        content = self.frame.output_textbox.get(start_index, end_index)
+
+        for word in WORDS:
+            start = 0
+            while True:
+                pos = content.find(word, start)
+                if pos == -1:
+                    break
+                word_start = f"{start_index}+{pos}c"
+                word_end = f"{word_start}+{len(word)}c"
+                tag_name = f"{word.lower()}_color"
+                self.frame.output_textbox.tag_add(tag_name, word_start, word_end)
+                if word in BAD_WORDS:
+                    self.frame.output_textbox.tag_config(tag_name, foreground=BAD_COLOR)
+                elif word in GOOD_WORDS:
+                    self.frame.output_textbox.tag_config(tag_name, foreground=GOOD_COLOR)
+                start = pos + len(word)
+
        
    def prediction(self, text_data:TextData) -> TextData:
        result = self.model_inference.predict(text_data.text)
--- a/src/utils/webTextExtractor.py
+++ b/src/utils/webTextExtractor.py
@ -34,20 +34,21 @@ class WebTextExtractor:
            raise Exception("Kein Text extrahiert. Bitte zuerst extract_text() aufrufen.")
    
    def resize_article(self, article):
-        """Resizes the article by removing the first 30 words and the last 10%."""
+        """Resizes the article by removing the first 30 words and limiting the length to 512 words."""
        # Split the article into a list of words
        words = article.split()

        # Remove the first 30 words
        words = words[30:]

-        # Calculate the number of words to remove from the end (10% of the total words)
-        num_to_remove = int(len(words) * 0.1)
+        # Calculate the number of words to keep (up to 512 words)
+        num_to_keep = min(512, len(words))

-        # Remove the last 10% of words
-        words = words[:-num_to_remove]
+        # Slice the list of words to keep only the first num_to_keep words
+        words = words[:num_to_keep]

        # Join the remaining words back into a single string
        resized_article = ' '.join(words)

        return resized_article
+
--- a/src/views/mainScreen.py
+++ b/src/views/mainScreen.py
@ -20,10 +20,10 @@ class MainFrame(ctk.CTkFrame):
        self.entry_url = ctk.CTkEntry(self.frame1, placeholder_text='Enter the article link', height=50)
        self.entry_url.grid(row=0, column=0, padx=10, pady=10, sticky="ew")
        
-        self.input_textbox = ctk.CTkTextbox(self.frame1, height=200)
+        self.input_textbox = ctk.CTkTextbox(self.frame1, height=150)
        self.input_textbox.grid(row=1, column=0, columnspan=2, padx=10, pady=10, sticky="nsew")
        
-        self.output_textbox = ctk.CTkTextbox(self.frame1, height=150, state="disabled")
+        self.output_textbox = ctk.CTkTextbox(self.frame1, height=200, state="disabled")
        self.output_textbox.grid(row=2, column=0, columnspan=2, padx=10, pady=10, sticky="nsew")
        
        # Mittlerer Button