text extraction working

This commit is contained in:
Björn Ruthotto 2024-09-03 10:46:32 +02:00
parent f30490a676
commit 90ca3adec9
5 changed files with 51 additions and 17 deletions

View File

@ -4,7 +4,18 @@ class MainFrameController:
def __init__(self,frame:MainFrame) -> None:
self.frame = frame
self.text_data = TextData()
def get_entry(self):
self.text_data.url = self.frame.entry_url.get()
def get_textdata(self) -> TextData:
text_data = TextData()
text_data.url = self.frame.entry_url.get()
if text_data.text_from_url():
text_data.text = self.frame.input_textbox.get("0.0", "end")
return text_data
def press_check_button(self):
text_data = self.get_textdata()
print(f"text:{text_data.text}")

View File

@ -12,6 +12,8 @@ class Main(customtkinter.CTk):
mainFrame = MainFrame(self)
mainFrame.grid(row=0, column=0, padx=10, pady=10,sticky="nsew")
controller_mainframe = MainFrameController(mainFrame)
mainFrame.set_controller(controller_mainframe)
self.title("VeracityAI")
self.geometry("800x500")

View File

@ -1,11 +1,28 @@
from utils.webTextExtractor import WebTextExtractor
class TextData:
def __init__(self) -> None:
self.url = ""
def __init__(self, url: str = "") -> None:
self.url = url
self.text = ""
def text_from_url(self):
if self.url is not "" and self.text == "":
extractor = WebTextExtractor(self.url)
self.text = extractor.get_text()
self._extractor = None
def set_url(self, url: str) -> None:
self.url = url
self.text = "" # Reset text when URL changes
self._extractor = None # Reset extractor when URL changes
def text_from_url(self)-> bool:
if not self.url:
print("No url")
return True
if not self.text:
print("Extrahiere Text von URL...")
self._extractor = WebTextExtractor(self.url)
self._extractor.fetch_content()
self._extractor.extract_text()
self.text = self._extractor.get_text()
return False

View File

@ -6,9 +6,6 @@ class WebTextExtractor:
self.url = url
self.content = None
self.text = None
self.fetch_content()
self.extract_text()
def fetch_content(self):
"""Holt den HTML-Inhalt von der Webseite."""
@ -31,7 +28,7 @@ class WebTextExtractor:
def get_text(self):
"""Gibt den extrahierten Text zurück."""
if self.text:
return self.text
return self.text
else:
raise Exception("Kein Text extrahiert. Bitte zuerst extract_text() aufrufen.")

View File

@ -4,7 +4,7 @@ class MainFrame(ctk.CTkFrame):
def __init__(self, master: Any, **kwargs):
super().__init__(master, **kwargs)
self.controller = None
# Konfiguriere das Hauptframe, um sich zu dehnen
self.grid_rowconfigure(0, weight=1)
self.grid_columnconfigure(0, weight=1) # Linke Spalte soll sich dehnen
@ -27,7 +27,7 @@ class MainFrame(ctk.CTkFrame):
self.output_textbox.grid(row=2, column=0, padx=10, pady=10, sticky="nsew")
# Mittlerer Button
self.check_button = ctk.CTkButton(self, text="Check", width=60, height=300)
self.check_button = ctk.CTkButton(self, text="Check", width=60, height=300, command=self.check_button_event)
self.check_button.grid(row=0, column=1, padx=10, pady=10, sticky="nsew")
# Rechte scrollbare Ansicht
@ -36,4 +36,11 @@ class MainFrame(ctk.CTkFrame):
# Überschrift hinzufügen
self.header = ctk.CTkLabel(self.scrollview, text="Leaderboard", font=("Arial", 24, "bold"))
self.header.pack(pady=10, padx=10, anchor="w")
self.header.pack(pady=10, padx=10, anchor="w")
def set_controller(self, controller):
self.controller = controller
def check_button_event(self):
if self.controller:
self.controller.press_check_button()