Merge f49d1a9b903e7c4b8ea9a6a3a7ebe164d2402944 into cd64fb966e7e9d764e622e42b177a1f13dc65ec0

This commit is contained in:
Alexandre Flament 2025-05-15 13:08:01 +02:00 committed by GitHub
commit f859d7d94b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -2,11 +2,11 @@
# pylint: disable=missing-module-docstring, invalid-name # pylint: disable=missing-module-docstring, invalid-name
import gc import gc
import time
import typing import typing
import types import types
import functools import functools
import itertools import itertools
from time import time
from timeit import default_timer from timeit import default_timer
from urllib.parse import urlparse from urllib.parse import urlparse
@ -66,15 +66,15 @@ def _download_and_check_if_image(image_url: str) -> bool:
This function should not be called directly: use _is_url_image This function should not be called directly: use _is_url_image
otherwise the cache of functools.lru_cache contains data: URL which might be huge. otherwise the cache of functools.lru_cache contains data: URL which might be huge.
""" """
retry = 2 retry = 3
while retry > 0: while retry > 0:
a = time() a = default_timer()
try: try:
# use "image_proxy" (avoid HTTP/2) # use "image_proxy" (avoid HTTP/2)
network.set_timeout_for_thread(2)
network.set_context_network_name('image_proxy') network.set_context_network_name('image_proxy')
r, stream = network.stream( r = network.get(
'GET',
image_url, image_url,
timeout=10.0, timeout=10.0,
allow_redirects=True, allow_redirects=True,
@ -90,19 +90,20 @@ def _download_and_check_if_image(image_url: str) -> bool:
'Cache-Control': 'max-age=0', 'Cache-Control': 'max-age=0',
}, },
) )
r.close()
if r.status_code == 200: if r.status_code == 200:
is_image = r.headers.get('content-type', '').startswith('image/') is_image = r.headers.get('content-type', '').startswith('image/')
else: else:
is_image = False is_image = False
del r del r
del stream
return is_image return is_image
except httpx.TimeoutException: except httpx.TimeoutException:
logger.error('Timeout for %s: %i', image_url, int(time() - a)) logger.error('Timeout for %s: %i', image_url, int(default_timer() - a))
time.sleep(1)
retry -= 1 retry -= 1
except httpx.HTTPError: except httpx.HTTPStatusError as e:
logger.exception('Exception for %s', image_url) logger.error('Exception for %s: HTTP status=%i', image_url, e.response.status_code)
except httpx.HTTPError as e:
logger.error('Exception for %s: %s, %s', image_url, e.__class__.__name__, ",".join(e.args))
return False return False
return False return False