[refactor] duration strings: move parsing logic to utils.py

2025-03-20 21:16:37 +01:00 · 2025-03-20 21:16:37 +01:00 · 5006d2634f
commit 5006d2634f
parent c28d35c7fc
5 changed files with 42 additions and 42 deletions
--- a/searx/engines/bilibili.py
+++ b/searx/engines/bilibili.py
@ -56,18 +56,6 @@ def request(query, params):
    return params
 # Format the video duration
 def format_duration(duration):
    if not ":" in duration:
        return None
    minutes, seconds = map(int, duration.split(":"))
    total_seconds = minutes * 60 + seconds
    formatted_duration = str(timedelta(seconds=total_seconds))[2:] if 0 <= total_seconds < 3600 else ""
    return formatted_duration
 def response(resp):
    search_res = resp.json()
@ -83,7 +71,12 @@ def response(resp):
        unix_date = item["pubdate"]
        formatted_date = datetime.fromtimestamp(unix_date)
-        formatted_duration = format_duration(item["duration"])
+
        # the duration only seems to be valid if the video is less than 60 mins
        duration = utils.parse_duration_string(item["duration"])
        if duration and duration > timedelta(minutes=60):
            duration = None
        iframe_url = f"https://player.bilibili.com/player.html?aid={video_id}&high_quality=1&autoplay=false&danmaku=0"
        results.append(
@ -93,7 +86,7 @@ def response(resp):
                "content": description,
                "author": author,
                "publishedDate": formatted_date,
-                "length": formatted_duration,
+                "length": duration,
                "thumbnail": thumbnail,
                "iframe_src": iframe_url,
                "template": "videos.html",
--- a/searx/engines/iqiyi.py
+++ b/searx/engines/iqiyi.py
@ -2,9 +2,10 @@
 """iQiyi: A search engine for retrieving videos from iQiyi."""
 from urllib.parse import urlencode
-from datetime import datetime, timedelta
+from datetime import datetime
 from searx.exceptions import SearxEngineAPIException
 from searx.utils import parse_duration_string
 about = {
    "website": "https://www.iqiyi.com/",
@ -55,20 +56,7 @@ def response(resp):
            except (ValueError, TypeError):
                pass
-        length = None
+        length = parse_duration_string(album_info.get("subscriptionContent"))
        subscript_content = album_info.get("subscriptContent")
        if subscript_content:
            try:
                time_parts = subscript_content.split(":")
                if len(time_parts) == 2:
                    minutes, seconds = map(int, time_parts)
                    length = timedelta(minutes=minutes, seconds=seconds)
                elif len(time_parts) == 3:
                    hours, minutes, seconds = map(int, time_parts)
                    length = timedelta(hours=hours, minutes=minutes, seconds=seconds)
            except (ValueError, TypeError):
                pass
        results.append(
            {
                'url': album_info.get("pageUrl", "").replace("http://", "https://"),
--- a/searx/engines/peertube.py
+++ b/searx/engines/peertube.py
@ -6,7 +6,7 @@
 import re
 from urllib.parse import urlencode
-from datetime import datetime
+from datetime import datetime, timedelta
 from dateutil.parser import parse
 from dateutil.relativedelta import relativedelta
@ -50,12 +50,6 @@ safesearch = True
 safesearch_table = {0: 'both', 1: 'false', 2: 'false'}
 def minute_to_hm(minute):
    if isinstance(minute, int):
        return "%d:%02d" % (divmod(minute, 60))
    return None
 def request(query, params):
    """Assemble request for the Peertube API"""
@ -117,13 +111,17 @@ def video_response(resp):
            if x
        ]
        duration = result.get('duration')
        if duration:
            duration = timedelta(seconds=duration)
        results.append(
            {
                'url': result['url'],
                'title': result['name'],
                'content': html_to_text(result.get('description') or ''),
                'author': result.get('account', {}).get('displayName'),
-                'length': minute_to_hm(result.get('duration')),
+                'length': duration,
                'views': humanize_number(result['views']),
                'template': 'videos.html',
                'publishedDate': parse(result['publishedAt']),
--- a/searx/engines/presearch.py
+++ b/searx/engines/presearch.py
@ -73,7 +73,7 @@ Implementations
 from urllib.parse import urlencode, urlparse
 from searx import locales
 from searx.network import get
-from searx.utils import gen_useragent, html_to_text
+from searx.utils import gen_useragent, html_to_text, parse_duration_string
 about = {
    "website": "https://presearch.io",
@ -270,7 +270,7 @@ def response(resp):
                    'url': item.get('link'),
                    'content': item.get('description', ''),
                    'thumbnail': item.get('image'),
-                    'length': item.get('duration'),
+                    'length': parse_duration_string(item.get('duration')),
                }
            )
--- a/searx/utils.py
+++ b/searx/utils.py
@ -1,7 +1,5 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
-"""Utility functions for the engines
+"""Utility functions for the engines"""
 """
 from __future__ import annotations
@ -18,6 +16,7 @@ from random import choice
 from html.parser import HTMLParser
 from html import escape
 from urllib.parse import urljoin, urlparse, parse_qs, urlencode
 from datetime import timedelta
 from markdown_it import MarkdownIt
 from lxml import html
@ -831,3 +830,25 @@ def js_variable_to_python(js_variable):
    s = s.replace(chr(1), ':')
    # load the JSON and return the result
    return json.loads(s)
 def parse_duration_string(duration_str: str) -> timedelta | None:
    """Parse a time string in format MM:SS or HH:MM:SS and convert it to a `timedelta` object.
    Returns None if the provided string doesn't match any of the formats.
    """
    duration_str = duration_str.strip()
    if not duration_str:
        return None
    try:
        # prepending ["00"] here inits hours to 0 if they are not provided
        time_parts = (["00"] + duration_str.split(":"))[:3]
        hours, minutes, seconds = map(int, time_parts)
        return timedelta(hours=hours, minutes=minutes, seconds=seconds)
    except (ValueError, TypeError):
        pass
    return None