Veracity_AI/tests/utils/test_webText_Extractor.py

73 lines
2.5 KiB
Python

import unittest.mock
import pytest
import os
import sys
from unittest.mock import MagicMock
# Add the src directory to the Python path
src_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'src'))
sys.path.insert(0, src_dir)
from utils.webTextExtractor import WebTextExtractor
@pytest.fixture
def web_text_extractor():
return WebTextExtractor("https://example.com")
def test_fetch_content(web_text_extractor):
web_text_extractor.fetch_content()
assert web_text_extractor.content is not None
def test_extract_text(web_text_extractor):
web_text_extractor.fetch_content()
web_text_extractor.extract_text()
assert web_text_extractor.text is not None
def test_get_text(web_text_extractor):
# Mock the fetch_content method to set some content
web_text_extractor.fetch_content = MagicMock()
# Set the content that fetch_content would provide
web_text_extractor.content = "Some content from the webpage"
# Mock extract_text to simulate its behavior
def mock_extract_text():
web_text_extractor.text = "Example text" # Simulate the extraction of text
web_text_extractor.extract_text = MagicMock(side_effect=mock_extract_text)
# Call the mocked fetch_content method
web_text_extractor.fetch_content()
# Call the extract_text() method, which will now set the text
web_text_extractor.extract_text()
# Call the get_text() method
result = web_text_extractor.get_text()
# Assert that the result is not None
assert result is not None
# Assert that fetch_content and extract_text were called
web_text_extractor.fetch_content.assert_called_once()
web_text_extractor.extract_text.assert_called_once()
# Assert that the return value of get_text() is "Example text"
assert result == "Example text"
def test_resize_article(web_text_extractor):
# Create a long article text for testing
article = " ".join(["This is a test article"] * 600)
resized_article = web_text_extractor.resize_article(article)
# Check if the resized article has the expected length
assert len(resized_article.split()) == 512
# Check if the resized article starts with the 31st word of the original article
assert resized_article.split()[0] == "This"
assert resized_article.split()[1] == "is"
assert resized_article.split()[2] == "a"
assert resized_article.split()[3] == "test"
assert resized_article.split()[4] == "article"
if __name__ == "__main__":
pytest.main([__file__])