Veracity_AI/tests/utils/test_webText_Extractor.py

import unittest.mock
import pytest
import os
import sys
from unittest.mock import MagicMock
# Add the src directory to the Python path
src_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'src'))
sys.path.insert(0, src_dir)
from utils.webTextExtractor import WebTextExtractor

@pytest.fixture
def web_text_extractor():
    return WebTextExtractor("https://example.com")

def test_fetch_content(web_text_extractor):
    web_text_extractor.fetch_content()
    assert web_text_extractor.content is not None

def test_extract_text(web_text_extractor):
    web_text_extractor.fetch_content()
    web_text_extractor.extract_text()
    assert web_text_extractor.text is not None

def test_get_text(web_text_extractor):
    # Mock the fetch_content method to set some content
    web_text_extractor.fetch_content = MagicMock()

    # Set the content that fetch_content would provide
    web_text_extractor.content = "Some content from the webpage"

    # Mock extract_text to simulate its behavior
    def mock_extract_text():
        web_text_extractor.text = "Example text"  # Simulate the extraction of text

    web_text_extractor.extract_text = MagicMock(side_effect=mock_extract_text)

    # Call the mocked fetch_content method
    web_text_extractor.fetch_content()

    # Call the extract_text() method, which will now set the text
    web_text_extractor.extract_text()

    # Call the get_text() method
    result = web_text_extractor.get_text()

    # Assert that the result is not None
    assert result is not None

    # Assert that fetch_content and extract_text were called
    web_text_extractor.fetch_content.assert_called_once()
    web_text_extractor.extract_text.assert_called_once()

    # Assert that the return value of get_text() is "Example text"
    assert result == "Example text"

def test_resize_article(web_text_extractor):
    # Create a long article text for testing
    article = " ".join(["This is a test article"] * 600)
    resized_article = web_text_extractor.resize_article(article)

    # Check if the resized article has the expected length
    assert len(resized_article.split()) == 512

    # Check if the resized article starts with the 31st word of the original article
    assert resized_article.split()[0] == "This"
    assert resized_article.split()[1] == "is"
    assert resized_article.split()[2] == "a"
    assert resized_article.split()[3] == "test"
    assert resized_article.split()[4] == "article"

if __name__ == "__main__":
    pytest.main([__file__])