fix: Naprawiono wyciek połączeń HTTP w fix_google_news_images.py
- Dodano context manager (with) dla sesji requests - Jawne zamykanie odpowiedzi HTTP (response.close()) - Dodano flush=True do print dla natychmiastowego outputu - Rozwiązuje problem 725+ otwartych połączeń Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
07171b46b7
commit
af4c0d157a
@ -96,26 +96,28 @@ def decode_google_news_url(google_url: str) -> str | None:
|
||||
|
||||
# Metoda 2: Podążaj za przekierowaniami
|
||||
try:
|
||||
session = requests.Session()
|
||||
session.headers.update(HEADERS)
|
||||
# Użyj context manager żeby zamknąć sesję po użyciu
|
||||
with requests.Session() as session:
|
||||
session.headers.update(HEADERS)
|
||||
|
||||
response = session.get(google_url, timeout=REQUEST_TIMEOUT, allow_redirects=True)
|
||||
final_url = response.url
|
||||
response = session.get(google_url, timeout=REQUEST_TIMEOUT, allow_redirects=True)
|
||||
final_url = response.url
|
||||
response.close() # Jawne zamknięcie odpowiedzi
|
||||
|
||||
# Jeśli wylądowaliśmy na consent.google.com, szukaj URL w parametrach
|
||||
if 'consent.google.com' in final_url:
|
||||
parsed = urlparse(final_url)
|
||||
params = parse_qs(parsed.query)
|
||||
if 'continue' in params:
|
||||
continue_url = unquote(params['continue'][0])
|
||||
# Rekurencyjnie dekoduj
|
||||
if 'news.google.com' in continue_url:
|
||||
return decode_google_news_url(continue_url)
|
||||
return continue_url
|
||||
# Jeśli wylądowaliśmy na consent.google.com, szukaj URL w parametrach
|
||||
if 'consent.google.com' in final_url:
|
||||
parsed = urlparse(final_url)
|
||||
params = parse_qs(parsed.query)
|
||||
if 'continue' in params:
|
||||
continue_url = unquote(params['continue'][0])
|
||||
# Rekurencyjnie dekoduj
|
||||
if 'news.google.com' in continue_url:
|
||||
return decode_google_news_url(continue_url)
|
||||
return continue_url
|
||||
|
||||
# Jeśli to nie jest Google, mamy oryginalny URL
|
||||
if 'google.com' not in final_url:
|
||||
return final_url
|
||||
# Jeśli to nie jest Google, mamy oryginalny URL
|
||||
if 'google.com' not in final_url:
|
||||
return final_url
|
||||
|
||||
except Exception as e:
|
||||
pass
|
||||
@ -126,10 +128,12 @@ def decode_google_news_url(google_url: str) -> str | None:
|
||||
def extract_og_image(url: str) -> str | None:
|
||||
"""Pobierz og:image z podanej strony."""
|
||||
try:
|
||||
response = requests.get(url, headers=HEADERS, timeout=REQUEST_TIMEOUT)
|
||||
response.raise_for_status()
|
||||
# Użyj context manager i zamknij połączenie
|
||||
with requests.get(url, headers=HEADERS, timeout=REQUEST_TIMEOUT, stream=False) as response:
|
||||
response.raise_for_status()
|
||||
html_content = response.text
|
||||
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
|
||||
# Szukaj og:image
|
||||
og_image = soup.find('meta', property='og:image')
|
||||
@ -200,10 +204,10 @@ def main():
|
||||
}
|
||||
|
||||
for i, news in enumerate(news_items, 1):
|
||||
print(f"[{i}/{len(news_items)}] {news.title[:55]}...")
|
||||
print(f"[{i}/{len(news_items)}] {news.title[:55]}...", flush=True)
|
||||
|
||||
# Dekoduj URL Google News
|
||||
print(f" → Dekodowanie URL...")
|
||||
print(f" → Dekodowanie URL...", flush=True)
|
||||
original_url = decode_google_news_url(news.url)
|
||||
|
||||
if not original_url:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user