import unittest.mock import pytest import os import sys from unittest.mock import MagicMock # Add the src directory to the Python path src_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'src')) sys.path.insert(0, src_dir) from utils.webTextExtractor import WebTextExtractor @pytest.fixture def web_text_extractor(): return WebTextExtractor("https://example.com") def test_fetch_content(web_text_extractor): web_text_extractor.fetch_content() assert web_text_extractor.content is not None def test_extract_text(web_text_extractor): web_text_extractor.fetch_content() web_text_extractor.extract_text() assert web_text_extractor.text is not None def test_get_text(web_text_extractor): # Mock the fetch_content method to set some content web_text_extractor.fetch_content = MagicMock() # Set the content that fetch_content would provide web_text_extractor.content = "Some content from the webpage" # Mock extract_text to simulate its behavior def mock_extract_text(): web_text_extractor.text = "Example text" # Simulate the extraction of text web_text_extractor.extract_text = MagicMock(side_effect=mock_extract_text) # Call the mocked fetch_content method web_text_extractor.fetch_content() # Call the extract_text() method, which will now set the text web_text_extractor.extract_text() # Call the get_text() method result = web_text_extractor.get_text() # Assert that the result is not None assert result is not None # Assert that fetch_content and extract_text were called web_text_extractor.fetch_content.assert_called_once() web_text_extractor.extract_text.assert_called_once() # Assert that the return value of get_text() is "Example text" assert result == "Example text" def test_resize_article(web_text_extractor): # Create a long article text for testing article = " ".join(["This is a test article"] * 600) resized_article = web_text_extractor.resize_article(article) # Check if the resized article has the expected length assert len(resized_article.split()) == 512 # Check if the resized article starts with the 31st word of the original article assert resized_article.split()[0] == "This" assert resized_article.split()[1] == "is" assert resized_article.split()[2] == "a" assert resized_article.split()[3] == "test" assert resized_article.split()[4] == "article" if __name__ == "__main__": pytest.main([__file__])