73 lines
2.5 KiB
Python
73 lines
2.5 KiB
Python
import unittest.mock
|
|
import pytest
|
|
import os
|
|
import sys
|
|
from unittest.mock import MagicMock
|
|
# Add the src directory to the Python path
|
|
src_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'src'))
|
|
sys.path.insert(0, src_dir)
|
|
from utils.webTextExtractor import WebTextExtractor
|
|
|
|
@pytest.fixture
|
|
def web_text_extractor():
|
|
return WebTextExtractor("https://example.com")
|
|
|
|
def test_fetch_content(web_text_extractor):
|
|
web_text_extractor.fetch_content()
|
|
assert web_text_extractor.content is not None
|
|
|
|
def test_extract_text(web_text_extractor):
|
|
web_text_extractor.fetch_content()
|
|
web_text_extractor.extract_text()
|
|
assert web_text_extractor.text is not None
|
|
|
|
def test_get_text(web_text_extractor):
|
|
# Mock the fetch_content method to set some content
|
|
web_text_extractor.fetch_content = MagicMock()
|
|
|
|
# Set the content that fetch_content would provide
|
|
web_text_extractor.content = "Some content from the webpage"
|
|
|
|
# Mock extract_text to simulate its behavior
|
|
def mock_extract_text():
|
|
web_text_extractor.text = "Example text" # Simulate the extraction of text
|
|
|
|
web_text_extractor.extract_text = MagicMock(side_effect=mock_extract_text)
|
|
|
|
# Call the mocked fetch_content method
|
|
web_text_extractor.fetch_content()
|
|
|
|
# Call the extract_text() method, which will now set the text
|
|
web_text_extractor.extract_text()
|
|
|
|
# Call the get_text() method
|
|
result = web_text_extractor.get_text()
|
|
|
|
# Assert that the result is not None
|
|
assert result is not None
|
|
|
|
# Assert that fetch_content and extract_text were called
|
|
web_text_extractor.fetch_content.assert_called_once()
|
|
web_text_extractor.extract_text.assert_called_once()
|
|
|
|
# Assert that the return value of get_text() is "Example text"
|
|
assert result == "Example text"
|
|
|
|
def test_resize_article(web_text_extractor):
|
|
# Create a long article text for testing
|
|
article = " ".join(["This is a test article"] * 600)
|
|
resized_article = web_text_extractor.resize_article(article)
|
|
|
|
# Check if the resized article has the expected length
|
|
assert len(resized_article.split()) == 512
|
|
|
|
# Check if the resized article starts with the 31st word of the original article
|
|
assert resized_article.split()[0] == "This"
|
|
assert resized_article.split()[1] == "is"
|
|
assert resized_article.split()[2] == "a"
|
|
assert resized_article.split()[3] == "test"
|
|
assert resized_article.split()[4] == "article"
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__])
|