[refactor] duration strings: move parsing logic to utils.py

This commit is contained in:
Bnyro 2025-03-20 21:16:37 +01:00 committed by Markus Heiser
parent c28d35c7fc
commit 5006d2634f
5 changed files with 42 additions and 42 deletions

View File

@ -56,18 +56,6 @@ def request(query, params):
return params return params
# Format the video duration
def format_duration(duration):
if not ":" in duration:
return None
minutes, seconds = map(int, duration.split(":"))
total_seconds = minutes * 60 + seconds
formatted_duration = str(timedelta(seconds=total_seconds))[2:] if 0 <= total_seconds < 3600 else ""
return formatted_duration
def response(resp): def response(resp):
search_res = resp.json() search_res = resp.json()
@ -83,7 +71,12 @@ def response(resp):
unix_date = item["pubdate"] unix_date = item["pubdate"]
formatted_date = datetime.fromtimestamp(unix_date) formatted_date = datetime.fromtimestamp(unix_date)
formatted_duration = format_duration(item["duration"])
# the duration only seems to be valid if the video is less than 60 mins
duration = utils.parse_duration_string(item["duration"])
if duration and duration > timedelta(minutes=60):
duration = None
iframe_url = f"https://player.bilibili.com/player.html?aid={video_id}&high_quality=1&autoplay=false&danmaku=0" iframe_url = f"https://player.bilibili.com/player.html?aid={video_id}&high_quality=1&autoplay=false&danmaku=0"
results.append( results.append(
@ -93,7 +86,7 @@ def response(resp):
"content": description, "content": description,
"author": author, "author": author,
"publishedDate": formatted_date, "publishedDate": formatted_date,
"length": formatted_duration, "length": duration,
"thumbnail": thumbnail, "thumbnail": thumbnail,
"iframe_src": iframe_url, "iframe_src": iframe_url,
"template": "videos.html", "template": "videos.html",

View File

@ -2,9 +2,10 @@
"""iQiyi: A search engine for retrieving videos from iQiyi.""" """iQiyi: A search engine for retrieving videos from iQiyi."""
from urllib.parse import urlencode from urllib.parse import urlencode
from datetime import datetime, timedelta from datetime import datetime
from searx.exceptions import SearxEngineAPIException from searx.exceptions import SearxEngineAPIException
from searx.utils import parse_duration_string
about = { about = {
"website": "https://www.iqiyi.com/", "website": "https://www.iqiyi.com/",
@ -55,20 +56,7 @@ def response(resp):
except (ValueError, TypeError): except (ValueError, TypeError):
pass pass
length = None length = parse_duration_string(album_info.get("subscriptionContent"))
subscript_content = album_info.get("subscriptContent")
if subscript_content:
try:
time_parts = subscript_content.split(":")
if len(time_parts) == 2:
minutes, seconds = map(int, time_parts)
length = timedelta(minutes=minutes, seconds=seconds)
elif len(time_parts) == 3:
hours, minutes, seconds = map(int, time_parts)
length = timedelta(hours=hours, minutes=minutes, seconds=seconds)
except (ValueError, TypeError):
pass
results.append( results.append(
{ {
'url': album_info.get("pageUrl", "").replace("http://", "https://"), 'url': album_info.get("pageUrl", "").replace("http://", "https://"),

View File

@ -6,7 +6,7 @@
import re import re
from urllib.parse import urlencode from urllib.parse import urlencode
from datetime import datetime from datetime import datetime, timedelta
from dateutil.parser import parse from dateutil.parser import parse
from dateutil.relativedelta import relativedelta from dateutil.relativedelta import relativedelta
@ -50,12 +50,6 @@ safesearch = True
safesearch_table = {0: 'both', 1: 'false', 2: 'false'} safesearch_table = {0: 'both', 1: 'false', 2: 'false'}
def minute_to_hm(minute):
if isinstance(minute, int):
return "%d:%02d" % (divmod(minute, 60))
return None
def request(query, params): def request(query, params):
"""Assemble request for the Peertube API""" """Assemble request for the Peertube API"""
@ -117,13 +111,17 @@ def video_response(resp):
if x if x
] ]
duration = result.get('duration')
if duration:
duration = timedelta(seconds=duration)
results.append( results.append(
{ {
'url': result['url'], 'url': result['url'],
'title': result['name'], 'title': result['name'],
'content': html_to_text(result.get('description') or ''), 'content': html_to_text(result.get('description') or ''),
'author': result.get('account', {}).get('displayName'), 'author': result.get('account', {}).get('displayName'),
'length': minute_to_hm(result.get('duration')), 'length': duration,
'views': humanize_number(result['views']), 'views': humanize_number(result['views']),
'template': 'videos.html', 'template': 'videos.html',
'publishedDate': parse(result['publishedAt']), 'publishedDate': parse(result['publishedAt']),

View File

@ -73,7 +73,7 @@ Implementations
from urllib.parse import urlencode, urlparse from urllib.parse import urlencode, urlparse
from searx import locales from searx import locales
from searx.network import get from searx.network import get
from searx.utils import gen_useragent, html_to_text from searx.utils import gen_useragent, html_to_text, parse_duration_string
about = { about = {
"website": "https://presearch.io", "website": "https://presearch.io",
@ -270,7 +270,7 @@ def response(resp):
'url': item.get('link'), 'url': item.get('link'),
'content': item.get('description', ''), 'content': item.get('description', ''),
'thumbnail': item.get('image'), 'thumbnail': item.get('image'),
'length': item.get('duration'), 'length': parse_duration_string(item.get('duration')),
} }
) )

View File

@ -1,7 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
"""Utility functions for the engines """Utility functions for the engines"""
"""
from __future__ import annotations from __future__ import annotations
@ -18,6 +16,7 @@ from random import choice
from html.parser import HTMLParser from html.parser import HTMLParser
from html import escape from html import escape
from urllib.parse import urljoin, urlparse, parse_qs, urlencode from urllib.parse import urljoin, urlparse, parse_qs, urlencode
from datetime import timedelta
from markdown_it import MarkdownIt from markdown_it import MarkdownIt
from lxml import html from lxml import html
@ -831,3 +830,25 @@ def js_variable_to_python(js_variable):
s = s.replace(chr(1), ':') s = s.replace(chr(1), ':')
# load the JSON and return the result # load the JSON and return the result
return json.loads(s) return json.loads(s)
def parse_duration_string(duration_str: str) -> timedelta | None:
"""Parse a time string in format MM:SS or HH:MM:SS and convert it to a `timedelta` object.
Returns None if the provided string doesn't match any of the formats.
"""
duration_str = duration_str.strip()
if not duration_str:
return None
try:
# prepending ["00"] here inits hours to 0 if they are not provided
time_parts = (["00"] + duration_str.split(":"))[:3]
hours, minutes, seconds = map(int, time_parts)
return timedelta(hours=hours, minutes=minutes, seconds=seconds)
except (ValueError, TypeError):
pass
return None