diff --git a/blueprints/api/routes_company.py b/blueprints/api/routes_company.py index 95cd2bc..933b92b 100644 --- a/blueprints/api/routes_company.py +++ b/blueprints/api/routes_company.py @@ -592,6 +592,59 @@ def api_enrich_company_registry(company_id): db.close() +# ============================================================ +# LOGO FETCH API ROUTE +# ============================================================ + +@bp.route('/company//fetch-logo', methods=['POST']) +@login_required +def api_fetch_company_logo(company_id): + """ + API: Fetch company logo from their website automatically. + + Accessible by users who can edit the company profile (MANAGER+) or admins. + """ + db = SessionLocal() + try: + company = db.query(Company).filter_by(id=company_id).first() + if not company: + return jsonify({'success': False, 'error': 'Firma nie znaleziona'}), 404 + + # Permission: can_edit_company (MANAGER+) or is_admin + if not current_user.can_edit_company(company.id) and not current_user.is_admin: + return jsonify({ + 'success': False, + 'error': 'Brak uprawnień do edycji profilu firmy' + }), 403 + + if not company.website: + return jsonify({ + 'success': False, + 'error': 'Firma nie ma ustawionej strony WWW' + }), 400 + + from logo_fetch_service import LogoFetchService + service = LogoFetchService() + result = service.fetch_logo(company.website, company.slug) + + logger.info( + f"Logo fetch for company {company.id} ({company.name}): " + f"success={result['success']}, source={result.get('source')}, " + f"by={current_user.email}" + ) + + return jsonify(result) + + except Exception as e: + logger.error(f"Logo fetch error for company {company_id}: {str(e)}") + return jsonify({ + 'success': False, + 'error': f'Błąd podczas pobierania logo: {str(e)}' + }), 500 + finally: + db.close() + + # ============================================================ # AI ENRICHMENT HELPER FUNCTIONS # ============================================================ diff --git a/logo_fetch_service.py b/logo_fetch_service.py new file mode 100644 index 0000000..dc946b8 --- /dev/null +++ b/logo_fetch_service.py @@ -0,0 +1,389 @@ +""" +Logo Fetch Service - Automatically downloads company logos from their websites. + +Strategies (in priority order): +1. og:image / twitter:image meta tags +2. apple-touch-icon / link rel="icon" (largest size) +3. elements with "logo" in class/id/alt/src +4. Google Favicon API fallback + +Steps reported to frontend: +- fetch_website: GET company website +- meta_tags: Parse og:image, twitter:image, favicon +- scan_images: Scan img elements for logo candidates +- download: Download best candidate image +- convert: Convert to WebP format +- save: Save to static/img/companies/{slug}.webp +""" + +import logging +import os +import re +from io import BytesIO +from urllib.parse import urljoin, urlparse + +import requests +from bs4 import BeautifulSoup + +logger = logging.getLogger(__name__) + +USER_AGENT = 'Mozilla/5.0 (compatible; NordaBizBot/1.0)' +TIMEOUT = 10 +MAX_DOWNLOAD_SIZE = 5 * 1024 * 1024 # 5MB +MIN_LOGO_SIZE = 64 # px +MAX_LOGO_SIZE = 800 # px +WEBP_QUALITY = 85 + +LOGO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'static', 'img', 'companies') + + +class LogoFetchService: + + def fetch_logo(self, website_url: str, slug: str) -> dict: + """ + Fetch logo from company website and save as WebP. + + Returns: {'success': bool, 'message': str, 'source': str, 'steps': [...]} + """ + steps = [] + candidates = [] + + # Ensure URL has protocol + if not website_url.startswith('http'): + website_url = 'https://' + website_url + + # Step 1: Fetch website + html, base_url = self._step_fetch_website(website_url, steps) + if html is None: + return {'success': False, 'message': steps[-1]['message'], 'source': None, 'steps': steps} + + soup = BeautifulSoup(html, 'html.parser') + + # Step 2: Meta tags + self._step_meta_tags(soup, base_url, candidates, steps) + + # Step 3: Scan images + self._step_scan_images(soup, base_url, candidates, steps) + + # Add Google Favicon as last-resort fallback + domain = urlparse(base_url).netloc + if domain: + candidates.append({ + 'url': f'https://www.google.com/s2/favicons?domain={domain}&sz=128', + 'source': 'google_favicon', + 'priority': 100 + }) + + if not candidates: + steps.append({'step': 'download', 'status': 'error', 'message': 'Nie znaleziono kandydatów na logo'}) + steps.append({'step': 'convert', 'status': 'skipped', 'message': 'Pominięto — brak obrazu'}) + steps.append({'step': 'save', 'status': 'skipped', 'message': 'Pominięto — brak obrazu'}) + return {'success': False, 'message': 'Nie znaleziono logo na stronie firmy', 'source': None, 'steps': steps} + + # Sort by priority (lower = better) + candidates.sort(key=lambda c: c['priority']) + + # Step 4: Download best candidate + image_data, image_source, content_type = self._step_download(candidates, steps) + if image_data is None: + steps.append({'step': 'convert', 'status': 'skipped', 'message': 'Pominięto — brak obrazu'}) + steps.append({'step': 'save', 'status': 'skipped', 'message': 'Pominięto — brak obrazu'}) + return {'success': False, 'message': 'Nie udało się pobrać żadnego kandydata', 'source': None, 'steps': steps} + + # Step 5: Convert + is_svg = content_type and 'svg' in content_type + output_data, file_ext = self._step_convert(image_data, is_svg, steps) + if output_data is None: + steps.append({'step': 'save', 'status': 'skipped', 'message': 'Pominięto — błąd konwersji'}) + return {'success': False, 'message': 'Błąd konwersji obrazu', 'source': None, 'steps': steps} + + # Step 6: Save + saved_path = self._step_save(output_data, slug, file_ext, steps) + if saved_path is None: + return {'success': False, 'message': 'Błąd zapisu pliku', 'source': None, 'steps': steps} + + return { + 'success': True, + 'message': f'Logo pobrane z {image_source} i zapisane jako {slug}.{file_ext}', + 'source': image_source, + 'steps': steps + } + + def _step_fetch_website(self, url, steps): + """Step 1: Fetch the website HTML.""" + try: + response = requests.get(url, timeout=TIMEOUT, headers={ + 'User-Agent': USER_AGENT, + 'Accept': 'text/html,application/xhtml+xml' + }, allow_redirects=True) + response.raise_for_status() + steps.append({ + 'step': 'fetch_website', + 'status': 'complete', + 'message': f'Strona pobrana ({len(response.text)} znaków)' + }) + return response.text, response.url + except requests.exceptions.SSLError: + # Retry without SSL verification + try: + http_url = url.replace('https://', 'http://') + response = requests.get(http_url, timeout=TIMEOUT, headers={ + 'User-Agent': USER_AGENT + }, allow_redirects=True) + response.raise_for_status() + steps.append({ + 'step': 'fetch_website', + 'status': 'complete', + 'message': f'Strona pobrana przez HTTP (błąd SSL)' + }) + return response.text, response.url + except Exception as e: + steps.append({ + 'step': 'fetch_website', + 'status': 'error', + 'message': f'Błąd SSL i HTTP: {str(e)[:100]}' + }) + return None, None + except Exception as e: + steps.append({ + 'step': 'fetch_website', + 'status': 'error', + 'message': f'Nie udało się pobrać strony: {str(e)[:100]}' + }) + return None, None + + def _step_meta_tags(self, soup, base_url, candidates, steps): + """Step 2: Search meta tags for logo candidates.""" + found = [] + + # og:image + og_img = soup.find('meta', property='og:image') + if og_img and og_img.get('content'): + url = urljoin(base_url, og_img['content']) + candidates.append({'url': url, 'source': 'og:image', 'priority': 10}) + found.append('og:image') + + # twitter:image + tw_img = soup.find('meta', attrs={'name': 'twitter:image'}) + if tw_img and tw_img.get('content'): + url = urljoin(base_url, tw_img['content']) + candidates.append({'url': url, 'source': 'twitter:image', 'priority': 11}) + found.append('twitter:image') + + # apple-touch-icon (prefer largest) + touch_icons = soup.find_all('link', rel=lambda r: r and 'apple-touch-icon' in r) + if touch_icons: + best = max(touch_icons, key=lambda t: self._parse_size(t.get('sizes', '0x0'))) + url = urljoin(base_url, best.get('href', '')) + if url: + candidates.append({'url': url, 'source': 'apple-touch-icon', 'priority': 5}) + found.append('apple-touch-icon') + + # link rel="icon" (prefer largest, skip tiny favicons) + icons = soup.find_all('link', rel=lambda r: r and 'icon' in r and 'apple' not in str(r)) + for icon in icons: + size = self._parse_size(icon.get('sizes', '0x0')) + href = icon.get('href', '') + if href and size >= 64: + url = urljoin(base_url, href) + candidates.append({'url': url, 'source': 'favicon', 'priority': 15}) + found.append(f'favicon ({icon.get("sizes", "?")})') + + if found: + steps.append({ + 'step': 'meta_tags', + 'status': 'complete', + 'message': f'Znaleziono: {", ".join(found)}' + }) + else: + steps.append({ + 'step': 'meta_tags', + 'status': 'missing', + 'message': 'Brak meta tagów z logo' + }) + + def _step_scan_images(self, soup, base_url, candidates, steps): + """Step 3: Scan img elements for logo candidates.""" + found_count = 0 + + for img in soup.find_all('img'): + attrs_text = ' '.join([ + img.get('class', [''])[0] if isinstance(img.get('class'), list) else str(img.get('class', '')), + img.get('id', ''), + img.get('alt', ''), + img.get('src', '') + ]).lower() + + if 'logo' in attrs_text: + src = img.get('src') or img.get('data-src') or img.get('data-lazy-src') + if src: + url = urljoin(base_url, src) + # Prioritize based on attribute match + priority = 20 + if 'logo' in (img.get('id', '') + ' '.join(img.get('class', []))).lower(): + priority = 3 # Class/ID match is very strong signal + elif 'logo' in img.get('alt', '').lower(): + priority = 8 + candidates.append({'url': url, 'source': 'img_scan', 'priority': priority}) + found_count += 1 + + # Also check CSS background images in header/nav + for el in soup.select('header a[class*="logo"], nav a[class*="logo"], .logo, #logo, [class*="brand"]'): + style = el.get('style', '') + bg_match = re.search(r'url\(["\']?([^"\')\s]+)["\']?\)', style) + if bg_match: + url = urljoin(base_url, bg_match.group(1)) + candidates.append({'url': url, 'source': 'css_bg', 'priority': 7}) + found_count += 1 + + if found_count > 0: + steps.append({ + 'step': 'scan_images', + 'status': 'complete', + 'message': f'Znaleziono {found_count} kandydatów z elementów img/CSS' + }) + else: + steps.append({ + 'step': 'scan_images', + 'status': 'missing', + 'message': 'Brak elementów img z "logo" w atrybutach' + }) + + def _step_download(self, candidates, steps): + """Step 4: Download the best candidate image.""" + for candidate in candidates: + url = candidate['url'] + try: + response = requests.get(url, timeout=TIMEOUT, headers={ + 'User-Agent': USER_AGENT + }, stream=True) + + content_length = int(response.headers.get('content-length', 0)) + if content_length > MAX_DOWNLOAD_SIZE: + logger.debug(f"Skipping {url}: too large ({content_length} bytes)") + continue + + content_type = response.headers.get('content-type', '') + + # Verify it's an image + if not any(t in content_type for t in ['image', 'svg', 'octet-stream']): + # Could be a redirect to HTML page (common for og:image on some sites) + if 'html' in content_type: + continue + + data = response.content + + if len(data) > MAX_DOWNLOAD_SIZE: + continue + + # For raster images, verify dimensions + if 'svg' not in content_type: + try: + from PIL import Image + img = Image.open(BytesIO(data)) + w, h = img.size + if w < MIN_LOGO_SIZE or h < MIN_LOGO_SIZE: + logger.debug(f"Skipping {url}: too small ({w}x{h})") + continue + except Exception: + continue + + steps.append({ + 'step': 'download', + 'status': 'complete', + 'message': f'Pobrano obraz z {candidate["source"]} ({len(data)} bajtów)' + }) + return data, candidate['source'], content_type + + except Exception as e: + logger.debug(f"Failed to download {url}: {e}") + continue + + steps.append({ + 'step': 'download', + 'status': 'error', + 'message': 'Żaden kandydat nie spełnił wymagań (rozmiar, format)' + }) + return None, None, None + + def _step_convert(self, image_data, is_svg, steps): + """Step 5: Convert image to WebP (or keep SVG).""" + if is_svg: + steps.append({ + 'step': 'convert', + 'status': 'complete', + 'message': 'Format SVG — zapisuję bez konwersji' + }) + return image_data, 'svg' + + try: + from PIL import Image + + img = Image.open(BytesIO(image_data)) + + # Convert RGBA/P to RGB for WebP + if img.mode in ('RGBA', 'LA', 'P'): + if img.mode == 'P': + img = img.convert('RGBA') + background = Image.new('RGBA', img.size, (255, 255, 255, 255)) + background.paste(img, mask=img.split()[-1] if 'A' in img.mode else None) + img = background.convert('RGB') + elif img.mode != 'RGB': + img = img.convert('RGB') + + # Resize if too large + w, h = img.size + if w > MAX_LOGO_SIZE or h > MAX_LOGO_SIZE: + img.thumbnail((MAX_LOGO_SIZE, MAX_LOGO_SIZE), Image.LANCZOS) + w, h = img.size + + # Save to WebP + output = BytesIO() + img.save(output, format='WEBP', quality=WEBP_QUALITY) + output_data = output.getvalue() + + steps.append({ + 'step': 'convert', + 'status': 'complete', + 'message': f'Konwersja do WebP ({w}x{h}, {len(output_data)} bajtów)' + }) + return output_data, 'webp' + + except Exception as e: + steps.append({ + 'step': 'convert', + 'status': 'error', + 'message': f'Błąd konwersji: {str(e)[:100]}' + }) + return None, None + + def _step_save(self, data, slug, ext, steps): + """Step 6: Save the file to disk.""" + try: + os.makedirs(LOGO_DIR, exist_ok=True) + filename = f'{slug}.{ext}' + filepath = os.path.join(LOGO_DIR, filename) + + with open(filepath, 'wb') as f: + f.write(data) + + steps.append({ + 'step': 'save', + 'status': 'complete', + 'message': f'Zapisano jako {filename}' + }) + return filepath + + except Exception as e: + steps.append({ + 'step': 'save', + 'status': 'error', + 'message': f'Błąd zapisu: {str(e)[:100]}' + }) + return None + + @staticmethod + def _parse_size(sizes_str): + """Parse '180x180' to max dimension int.""" + match = re.search(r'(\d+)', str(sizes_str)) + return int(match.group(1)) if match else 0 diff --git a/templates/company_detail.html b/templates/company_detail.html index 62cd3fe..a2d51c3 100755 --- a/templates/company_detail.html +++ b/templates/company_detail.html @@ -265,6 +265,110 @@ .registry-enrich-btn.loading .spinner { display: inline-block; } .registry-enrich-btn.loading .btn-text { display: none; } + /* Logo Fetch Button */ + .logo-fetch-btn { + display: inline-flex; + align-items: center; + gap: var(--spacing-sm); + padding: var(--spacing-sm) var(--spacing-lg); + background: linear-gradient(135deg, #f59e0b 0%, #d97706 100%); + color: white; + border: none; + border-radius: var(--radius); + font-size: var(--font-size-sm); + font-weight: 600; + cursor: pointer; + transition: all 0.3s ease; + } + .logo-fetch-btn:hover:not(:disabled) { + transform: translateY(-2px); + box-shadow: 0 4px 12px rgba(245, 158, 11, 0.4); + } + .logo-fetch-btn:disabled { + background: #ccc; + cursor: not-allowed; + opacity: 0.7; + } + .logo-fetch-btn .spinner { + display: none; + width: 14px; + height: 14px; + border: 2px solid rgba(255,255,255,0.3); + border-top-color: white; + border-radius: 50%; + animation: spin 0.8s linear infinite; + } + .logo-fetch-btn.loading .spinner { display: inline-block; } + .logo-fetch-btn.loading .btn-text { display: none; } + + /* Logo Loading Overlay */ + .logo-loading-overlay { + display: none; + position: fixed; + top: 0; left: 0; right: 0; bottom: 0; + background: rgba(0, 0, 0, 0.6); + z-index: 2000; + align-items: center; + justify-content: center; + } + .logo-loading-overlay.active { + display: flex; + } + .logo-loading-content { + background: white; + border-radius: var(--radius-xl); + padding: var(--spacing-2xl); + max-width: 500px; + width: 90%; + box-shadow: 0 20px 60px rgba(0,0,0,0.3); + } + .logo-loading-header { + text-align: center; + margin-bottom: var(--spacing-xl); + } + .logo-loading-header h3 { + font-size: var(--font-size-xl); + color: var(--text-primary); + margin: 0 0 var(--spacing-xs) 0; + } + .logo-loading-header p { + color: var(--text-secondary); + font-size: var(--font-size-sm); + margin: 0; + } + .logo-steps { + display: flex; + flex-direction: column; + gap: var(--spacing-md); + } + .logo-step { + display: flex; + align-items: center; + gap: var(--spacing-md); + padding: var(--spacing-sm) 0; + } + .logo-step-icon { + width: 28px; + height: 28px; + flex-shrink: 0; + display: flex; + align-items: center; + justify-content: center; + } + .logo-step-icon svg { width: 20px; height: 20px; } + .logo-step-icon.pending svg { color: #d1d5db; } + .logo-step-icon.in_progress svg { color: #f59e0b; animation: spin 1s linear infinite; } + .logo-step-icon.complete svg { color: #10b981; } + .logo-step-icon.error svg { color: #ef4444; } + .logo-step-icon.missing svg { color: #6b7280; } + .logo-step-icon.skipped svg { color: #9ca3af; } + .logo-step-text { + font-size: var(--font-size-sm); + color: var(--text-secondary); + } + .logo-step.active .logo-step-text { color: var(--text-primary); font-weight: 500; } + .logo-step.done .logo-step-text { color: var(--text-secondary); } + /* AI Progress Modal */ .ai-progress-modal { display: none; @@ -701,6 +805,22 @@ Pobierz dane urzędowe {% endif %} + {% if can_edit_profile or is_admin %} + + {% endif %} {% if not can_enrich %} (tylko admin lub wlasciciel) @@ -3904,6 +4024,54 @@ + +
+
+
+

Pobieranie logo firmy

+

{{ company.name }}

+
+
+
+
+ +
+ Sprawdzam stronę WWW firmy... +
+
+
+ +
+ Szukam meta tagów (og:image, favicon)... +
+
+
+ +
+ Analizuję obrazy na stronie... +
+
+
+ +
+ Pobieram najlepszy kandydat... +
+
+
+ +
+ Konwertuję do formatu WebP... +
+
+
+ +
+ Zapisuję logo firmy... +
+
+
+
+