Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
263 lines
9.8 KiB
Python
263 lines
9.8 KiB
Python
"""
|
|
Unit Tests — Link Preview
|
|
==========================
|
|
|
|
Tests for blueprints/messages/link_preview.py:
|
|
- OGParser with og: meta tags
|
|
- OGParser fallback to <title> and meta description
|
|
- OGParser with no meta tags
|
|
- fetch_link_preview with no URL
|
|
- fetch_link_preview skips internal URLs
|
|
- fetch_link_preview success (mocked HTTP)
|
|
- fetch_link_preview timeout handling
|
|
- fetch_link_preview non-HTML content-type
|
|
- URL extraction from HTML anchor tags
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
|
|
|
import pytest
|
|
from unittest.mock import patch, MagicMock
|
|
from requests.exceptions import Timeout
|
|
|
|
from blueprints.messages.link_preview import fetch_link_preview, OGParser
|
|
|
|
|
|
# ============================================================
|
|
# OGParser Tests
|
|
# ============================================================
|
|
|
|
class TestOGParser:
|
|
"""Test OGParser HTML parsing."""
|
|
|
|
def test_parses_og_title_description_image(self):
|
|
html = """
|
|
<html><head>
|
|
<meta property="og:title" content="Test Title">
|
|
<meta property="og:description" content="Test Description">
|
|
<meta property="og:image" content="https://example.com/image.jpg">
|
|
</head></html>
|
|
"""
|
|
parser = OGParser()
|
|
parser.feed(html)
|
|
assert parser.og['title'] == 'Test Title'
|
|
assert parser.og['description'] == 'Test Description'
|
|
assert parser.og['image'] == 'https://example.com/image.jpg'
|
|
|
|
def test_fallback_to_title_tag_and_meta_description(self):
|
|
html = """
|
|
<html><head>
|
|
<title>Fallback Title</title>
|
|
<meta name="description" content="Fallback Description">
|
|
</head></html>
|
|
"""
|
|
parser = OGParser()
|
|
parser.feed(html)
|
|
assert parser.title == 'Fallback Title'
|
|
assert parser.og.get('description') == 'Fallback Description'
|
|
assert 'title' not in parser.og # og:title not set
|
|
|
|
def test_empty_html_returns_title_from_title_tag(self):
|
|
html = "<html><head><title>Only Title</title></head></html>"
|
|
parser = OGParser()
|
|
parser.feed(html)
|
|
assert parser.title == 'Only Title'
|
|
assert parser.og.get('description') is None
|
|
assert parser.og.get('image') is None
|
|
|
|
def test_no_meta_tags_empty_og(self):
|
|
html = "<html><head></head><body>No meta here</body></html>"
|
|
parser = OGParser()
|
|
parser.feed(html)
|
|
assert parser.og == {}
|
|
assert parser.title is None
|
|
|
|
def test_og_description_takes_precedence_over_meta_description(self):
|
|
html = """
|
|
<html><head>
|
|
<meta property="og:description" content="OG Desc">
|
|
<meta name="description" content="Meta Desc">
|
|
</head></html>
|
|
"""
|
|
parser = OGParser()
|
|
parser.feed(html)
|
|
assert parser.og['description'] == 'OG Desc'
|
|
|
|
|
|
# ============================================================
|
|
# fetch_link_preview Tests
|
|
# ============================================================
|
|
|
|
class TestFetchLinkPreview:
|
|
"""Test fetch_link_preview function."""
|
|
|
|
def test_returns_none_for_none_text(self):
|
|
result = fetch_link_preview(None)
|
|
assert result is None
|
|
|
|
def test_returns_none_for_empty_text(self):
|
|
result = fetch_link_preview('')
|
|
assert result is None
|
|
|
|
def test_returns_none_when_no_url_in_text(self):
|
|
result = fetch_link_preview('Cześć, jak się masz?')
|
|
assert result is None
|
|
|
|
def test_returns_none_for_internal_nordabiznes_url(self):
|
|
result = fetch_link_preview('Sprawdź https://nordabiznes.pl/company/test')
|
|
assert result is None
|
|
|
|
def test_returns_none_for_staging_internal_url(self):
|
|
result = fetch_link_preview('Link: https://staging.nordabiznes.pl/company/foo')
|
|
assert result is None
|
|
|
|
def test_returns_none_for_localhost_url(self):
|
|
result = fetch_link_preview('Dev: http://localhost:5000/test')
|
|
assert result is None
|
|
|
|
def test_success_returns_dict_with_og_data(self):
|
|
html = """<html><head>
|
|
<meta property="og:title" content="Example Title">
|
|
<meta property="og:description" content="Example Description">
|
|
<meta property="og:image" content="https://example.com/img.jpg">
|
|
</head></html>"""
|
|
|
|
mock_resp = MagicMock()
|
|
mock_resp.headers = {'content-type': 'text/html; charset=utf-8'}
|
|
mock_resp.text = html
|
|
mock_resp.raise_for_status = MagicMock()
|
|
|
|
with patch('blueprints.messages.link_preview.requests.get', return_value=mock_resp):
|
|
result = fetch_link_preview('Check out https://example.com')
|
|
|
|
assert result is not None
|
|
assert result['url'] == 'https://example.com'
|
|
assert result['title'] == 'Example Title'
|
|
assert result['description'] == 'Example Description'
|
|
assert result['image'] == 'https://example.com/img.jpg'
|
|
|
|
def test_success_uses_title_tag_fallback(self):
|
|
html = "<html><head><title>Page Title</title></head></html>"
|
|
|
|
mock_resp = MagicMock()
|
|
mock_resp.headers = {'content-type': 'text/html'}
|
|
mock_resp.text = html
|
|
mock_resp.raise_for_status = MagicMock()
|
|
|
|
with patch('blueprints.messages.link_preview.requests.get', return_value=mock_resp):
|
|
result = fetch_link_preview('See https://example.com for details')
|
|
|
|
assert result is not None
|
|
assert result['title'] == 'Page Title'
|
|
|
|
def test_returns_none_on_timeout(self):
|
|
with patch('blueprints.messages.link_preview.requests.get', side_effect=Timeout):
|
|
result = fetch_link_preview('Visit https://slow-site.example.com')
|
|
assert result is None
|
|
|
|
def test_returns_none_for_non_html_content_type(self):
|
|
mock_resp = MagicMock()
|
|
mock_resp.headers = {'content-type': 'application/pdf'}
|
|
mock_resp.text = '%PDF-1.4 binary content'
|
|
mock_resp.raise_for_status = MagicMock()
|
|
|
|
with patch('blueprints.messages.link_preview.requests.get', return_value=mock_resp):
|
|
result = fetch_link_preview('Download https://example.com/doc.pdf')
|
|
assert result is None
|
|
|
|
def test_returns_none_when_page_has_no_title(self):
|
|
html = "<html><head><meta name='robots' content='noindex'></head></html>"
|
|
|
|
mock_resp = MagicMock()
|
|
mock_resp.headers = {'content-type': 'text/html'}
|
|
mock_resp.text = html
|
|
mock_resp.raise_for_status = MagicMock()
|
|
|
|
with patch('blueprints.messages.link_preview.requests.get', return_value=mock_resp):
|
|
result = fetch_link_preview('Visit https://example.com')
|
|
assert result is None
|
|
|
|
def test_title_truncated_to_200_chars(self):
|
|
long_title = 'A' * 300
|
|
html = f"<html><head><title>{long_title}</title></head></html>"
|
|
|
|
mock_resp = MagicMock()
|
|
mock_resp.headers = {'content-type': 'text/html'}
|
|
mock_resp.text = html
|
|
mock_resp.raise_for_status = MagicMock()
|
|
|
|
with patch('blueprints.messages.link_preview.requests.get', return_value=mock_resp):
|
|
result = fetch_link_preview('https://example.com')
|
|
|
|
assert result is not None
|
|
assert len(result['title']) <= 200
|
|
|
|
def test_description_truncated_to_300_chars(self):
|
|
long_desc = 'B' * 400
|
|
html = f"""<html><head>
|
|
<title>Title</title>
|
|
<meta name="description" content="{long_desc}">
|
|
</head></html>"""
|
|
|
|
mock_resp = MagicMock()
|
|
mock_resp.headers = {'content-type': 'text/html'}
|
|
mock_resp.text = html
|
|
mock_resp.raise_for_status = MagicMock()
|
|
|
|
with patch('blueprints.messages.link_preview.requests.get', return_value=mock_resp):
|
|
result = fetch_link_preview('https://example.com')
|
|
|
|
assert result is not None
|
|
assert len(result['description']) <= 300
|
|
|
|
|
|
# ============================================================
|
|
# URL Extraction from HTML Content Tests
|
|
# ============================================================
|
|
|
|
class TestURLExtractionFromHTML:
|
|
"""Test that URLs inside HTML anchor tags are correctly found."""
|
|
|
|
def test_extracts_url_from_anchor_tag(self):
|
|
"""URL inside <a href> is extracted after stripping HTML tags."""
|
|
text = '<a href="https://external-site.com/page">Visit site</a>'
|
|
# The function strips HTML tags before extracting URLs,
|
|
# so href URL is not extracted — only bare URLs in text are.
|
|
# This test verifies the stripping behavior: no URL in visible text → None.
|
|
result = fetch_link_preview(text)
|
|
# After stripping tags, text is "Visit site" — no URL → None
|
|
assert result is None
|
|
|
|
def test_extracts_bare_url_from_mixed_html(self):
|
|
"""Bare URL in text alongside HTML is extracted correctly."""
|
|
text = '<p>Check out https://example.com/news for more</p>'
|
|
|
|
mock_resp = MagicMock()
|
|
mock_resp.headers = {'content-type': 'text/html'}
|
|
mock_resp.text = '<html><head><title>News</title></head></html>'
|
|
mock_resp.raise_for_status = MagicMock()
|
|
|
|
with patch('blueprints.messages.link_preview.requests.get', return_value=mock_resp):
|
|
result = fetch_link_preview(text)
|
|
|
|
assert result is not None
|
|
assert result['url'] == 'https://example.com/news'
|
|
|
|
def test_first_url_is_used_when_multiple_urls_present(self):
|
|
"""When text contains multiple URLs, the first one is used."""
|
|
text = 'First: https://first.example.com and second: https://second.example.com'
|
|
|
|
mock_resp = MagicMock()
|
|
mock_resp.headers = {'content-type': 'text/html'}
|
|
mock_resp.text = '<html><head><title>First</title></head></html>'
|
|
mock_resp.raise_for_status = MagicMock()
|
|
|
|
with patch('blueprints.messages.link_preview.requests.get', return_value=mock_resp):
|
|
result = fetch_link_preview(text)
|
|
|
|
assert result is not None
|
|
assert result['url'] == 'https://first.example.com'
|