text extraction working
This commit is contained in:
parent
f30490a676
commit
90ca3adec9
|
@ -4,7 +4,18 @@ class MainFrameController:
|
|||
|
||||
def __init__(self,frame:MainFrame) -> None:
|
||||
self.frame = frame
|
||||
self.text_data = TextData()
|
||||
|
||||
def get_entry(self):
|
||||
self.text_data.url = self.frame.entry_url.get()
|
||||
|
||||
def get_textdata(self) -> TextData:
|
||||
text_data = TextData()
|
||||
text_data.url = self.frame.entry_url.get()
|
||||
if text_data.text_from_url():
|
||||
text_data.text = self.frame.input_textbox.get("0.0", "end")
|
||||
|
||||
return text_data
|
||||
|
||||
def press_check_button(self):
|
||||
text_data = self.get_textdata()
|
||||
print(f"text:{text_data.text}")
|
||||
|
||||
|
|
@ -12,6 +12,8 @@ class Main(customtkinter.CTk):
|
|||
mainFrame = MainFrame(self)
|
||||
mainFrame.grid(row=0, column=0, padx=10, pady=10,sticky="nsew")
|
||||
controller_mainframe = MainFrameController(mainFrame)
|
||||
mainFrame.set_controller(controller_mainframe)
|
||||
|
||||
self.title("VeracityAI")
|
||||
self.geometry("800x500")
|
||||
|
||||
|
|
|
@ -1,11 +1,28 @@
|
|||
from utils.webTextExtractor import WebTextExtractor
|
||||
|
||||
class TextData:
|
||||
def __init__(self) -> None:
|
||||
self.url = ""
|
||||
def __init__(self, url: str = "") -> None:
|
||||
self.url = url
|
||||
self.text = ""
|
||||
|
||||
def text_from_url(self):
|
||||
if self.url is not "" and self.text == "":
|
||||
extractor = WebTextExtractor(self.url)
|
||||
self.text = extractor.get_text()
|
||||
self._extractor = None
|
||||
|
||||
def set_url(self, url: str) -> None:
|
||||
self.url = url
|
||||
self.text = "" # Reset text when URL changes
|
||||
self._extractor = None # Reset extractor when URL changes
|
||||
|
||||
def text_from_url(self)-> bool:
|
||||
if not self.url:
|
||||
print("No url")
|
||||
return True
|
||||
|
||||
if not self.text:
|
||||
print("Extrahiere Text von URL...")
|
||||
self._extractor = WebTextExtractor(self.url)
|
||||
self._extractor.fetch_content()
|
||||
self._extractor.extract_text()
|
||||
self.text = self._extractor.get_text()
|
||||
return False
|
||||
|
||||
|
||||
|
|
@ -6,9 +6,6 @@ class WebTextExtractor:
|
|||
self.url = url
|
||||
self.content = None
|
||||
self.text = None
|
||||
|
||||
self.fetch_content()
|
||||
self.extract_text()
|
||||
|
||||
def fetch_content(self):
|
||||
"""Holt den HTML-Inhalt von der Webseite."""
|
||||
|
@ -31,7 +28,7 @@ class WebTextExtractor:
|
|||
def get_text(self):
|
||||
"""Gibt den extrahierten Text zurück."""
|
||||
if self.text:
|
||||
return self.text
|
||||
return self.text
|
||||
else:
|
||||
raise Exception("Kein Text extrahiert. Bitte zuerst extract_text() aufrufen.")
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ class MainFrame(ctk.CTkFrame):
|
|||
|
||||
def __init__(self, master: Any, **kwargs):
|
||||
super().__init__(master, **kwargs)
|
||||
|
||||
self.controller = None
|
||||
# Konfiguriere das Hauptframe, um sich zu dehnen
|
||||
self.grid_rowconfigure(0, weight=1)
|
||||
self.grid_columnconfigure(0, weight=1) # Linke Spalte soll sich dehnen
|
||||
|
@ -27,7 +27,7 @@ class MainFrame(ctk.CTkFrame):
|
|||
self.output_textbox.grid(row=2, column=0, padx=10, pady=10, sticky="nsew")
|
||||
|
||||
# Mittlerer Button
|
||||
self.check_button = ctk.CTkButton(self, text="Check", width=60, height=300)
|
||||
self.check_button = ctk.CTkButton(self, text="Check", width=60, height=300, command=self.check_button_event)
|
||||
self.check_button.grid(row=0, column=1, padx=10, pady=10, sticky="nsew")
|
||||
|
||||
# Rechte scrollbare Ansicht
|
||||
|
@ -36,4 +36,11 @@ class MainFrame(ctk.CTkFrame):
|
|||
|
||||
# Überschrift hinzufügen
|
||||
self.header = ctk.CTkLabel(self.scrollview, text="Leaderboard", font=("Arial", 24, "bold"))
|
||||
self.header.pack(pady=10, padx=10, anchor="w")
|
||||
self.header.pack(pady=10, padx=10, anchor="w")
|
||||
|
||||
def set_controller(self, controller):
|
||||
self.controller = controller
|
||||
|
||||
def check_button_event(self):
|
||||
if self.controller:
|
||||
self.controller.press_check_button()
|
||||
|
|
Reference in New Issue