fix: use bidirectional name matching in Google Places validation

TERMO vs TERMO-BUD was incorrectly accepted (score 1.0) because denominator only counted company words. Now uses max(company, google) so extra words in either name lower the score. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-21 07:43:24 +01:00 · 2026-02-21 07:43:24 +01:00 · e91c9d38f1
commit e91c9d38f1
parent ae9a658b0c
1 changed files with 11 additions and 7 deletions
--- a/google_places_service.py
+++ b/google_places_service.py
@ -151,13 +151,13 @@ class GooglePlacesService:
    @staticmethod
    def _name_match_score(company_name: str, google_name: str) -> float:
        """
-        Compute name match score between company name and Google result name.
+        Compute bidirectional name match score between company name and Google result.

-        Returns float 0.0-1.0:
-          - 1.0 = all significant company words found in Google name
-          - 0.0 = no words matched
-        Uses word-boundary matching (not substring) to prevent
-        'IT' matching 'digital' or 'Space' matching 'Body Space' alone.
+        Uses max(company_words, google_words) as denominator so that
+        extra words in either name lower the score:
+          - "TERMO" vs "TERMO-BUD" → 1/max(1,2) = 0.50
+          - "TERMO" vs "TERMO"     → 1/max(1,1) = 1.00
+          - "IT Space" vs "IT Space" → 2/max(2,2) = 1.00
        """
        company_words = GooglePlacesService._tokenize_name(company_name)
        google_words = GooglePlacesService._tokenize_name(google_name)
@ -165,8 +165,12 @@ class GooglePlacesService:
        if not company_words:
            return 0.0

+        denominator = max(len(company_words), len(google_words))
+        if denominator == 0:
+            return 0.0
+
        matched = company_words & google_words
-        return len(matched) / len(company_words)
+        return len(matched) / denominator

    def search_place(self, query: str, location_bias: Dict = None,
                     company_name: str = None) -> Optional[Dict[str, Any]]: