diff --git a/docs/ui/ml_docs.md b/docs/ui/ml_docs.md index 4ff8272..3088b45 100644 --- a/docs/ui/ml_docs.md +++ b/docs/ui/ml_docs.md @@ -11,7 +11,7 @@ https://api.python.langchain.com/en/latest/llms/langchain_community.llms.ollama. 1. LLM For our custom LLM ```json { - "model": "phi3.5:latest", + "model": "mistral-nemo:12b-instruct-2407-q8_0", "apiBase": "https://ai.fabelous.app/v1/ollama/generic", "headers": {"Authorization": "Token xxx"}, "system": "System Message", diff --git a/src/utils/webTextExtractor.py b/src/utils/webTextExtractor.py index 7423bcc..3603160 100644 --- a/src/utils/webTextExtractor.py +++ b/src/utils/webTextExtractor.py @@ -22,6 +22,7 @@ class WebTextExtractor: self.text = soup.get_text() # Optional: Entferne überflüssige Leerzeichen und Zeilenumbrüche self.text = ' '.join(self.text.split()) + self.text = self.resize_article(self.text) else: raise Exception("Kein Inhalt zum Parsen. Bitte zuerst fetch_content() aufrufen.") @@ -31,6 +32,22 @@ class WebTextExtractor: return self.text else: raise Exception("Kein Text extrahiert. Bitte zuerst extract_text() aufrufen.") + + def resize_article(self, article): + """Resizes the article by removing the first 30 words and the last 10%.""" + # Split the article into a list of words + words = article.split() + # Remove the first 30 words + words = words[30:] -# Beispielaufruf + # Calculate the number of words to remove from the end (10% of the total words) + num_to_remove = int(len(words) * 0.1) + + # Remove the last 10% of words + words = words[:-num_to_remove] + + # Join the remaining words back into a single string + resized_article = ' '.join(words) + + return resized_article