diff --git a/requirements.txt b/requirements.txt index e06b397..3cabd84 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,5 @@ langchain_community langchain pathlib unstructured -markdown \ No newline at end of file +markdown +requests \ No newline at end of file diff --git a/scripts/Rag.py b/scripts/Rag.py index 0c5ee16..4b34789 100644 --- a/scripts/Rag.py +++ b/scripts/Rag.py @@ -6,6 +6,7 @@ from langchain_community.embeddings import OllamaEmbeddings from langchain_community.vectorstores import Chroma from langchain_community.chat_models import ChatOllama from langchain.chains import RetrievalQA +import requests @@ -40,9 +41,12 @@ class Rag: def get_file(self, file_path): # Check if the file path starts with 'https://' if file_path.startswith('https://'): - loader = WebBaseLoader(file_path) - data = loader.load() - if data is None: + try: + loader = WebBaseLoader(file_path) + data = loader.load() + if data is None: + return False + except requests.exceptions.SSLError: return False else: file_type = file_path.split(".")[-1] @@ -61,8 +65,7 @@ class Rag: loader = PyPDFLoader(file_path=file_path) data = loader.load_and_split() case _: - loader = WebBaseLoader(file_path) - data = loader.load() + return False except OSError: return False