[mod] implement searx.wikidata_units for unit converters
This commit is contained in:
parent
8bee676935
commit
7a49e95687
@ -15,7 +15,7 @@ import babel.numbers
|
|||||||
|
|
||||||
from flask_babel import gettext, get_locale
|
from flask_babel import gettext, get_locale
|
||||||
|
|
||||||
from searx import data
|
from searx.units import symbol_to_si
|
||||||
from searx.plugins import Plugin, PluginInfo
|
from searx.plugins import Plugin, PluginInfo
|
||||||
from searx.result_types import EngineResults
|
from searx.result_types import EngineResults
|
||||||
|
|
||||||
@ -86,132 +86,6 @@ RE_MEASURE = r'''
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
ADDITIONAL_UNITS = [
|
|
||||||
{
|
|
||||||
"si_name": "Q11579",
|
|
||||||
"symbol": "°C",
|
|
||||||
"to_si": lambda val: val + 273.15,
|
|
||||||
"from_si": lambda val: val - 273.15,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"si_name": "Q11579",
|
|
||||||
"symbol": "°F",
|
|
||||||
"to_si": lambda val: (val + 459.67) * 5 / 9,
|
|
||||||
"from_si": lambda val: (val * 9 / 5) - 459.67,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
"""Additional items to convert from a measure unit to a SI unit (vice versa).
|
|
||||||
|
|
||||||
.. code:: python
|
|
||||||
|
|
||||||
{
|
|
||||||
"si_name": "Q11579", # Wikidata item ID of the SI unit (Kelvin)
|
|
||||||
"symbol": "°C", # symbol of the measure unit
|
|
||||||
"to_si": lambda val: val + 273.15, # convert measure value (val) to SI unit
|
|
||||||
"from_si": lambda val: val - 273.15, # convert SI value (val) measure unit
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"si_name": "Q11573",
|
|
||||||
"symbol": "mi",
|
|
||||||
"to_si": 1609.344, # convert measure value (val) to SI unit
|
|
||||||
"from_si": 1 / 1609.344 # convert SI value (val) measure unit
|
|
||||||
},
|
|
||||||
|
|
||||||
The values of ``to_si`` and ``from_si`` can be of :py:obj:`float` (a multiplier)
|
|
||||||
or a callable_ (val in / converted value returned).
|
|
||||||
|
|
||||||
.. _callable: https://docs.python.org/3/glossary.html#term-callable
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
ALIAS_SYMBOLS = {
|
|
||||||
'°C': ('C',),
|
|
||||||
'°F': ('F',),
|
|
||||||
'mi': ('L',),
|
|
||||||
}
|
|
||||||
"""Alias symbols for known unit of measure symbols / by example::
|
|
||||||
|
|
||||||
'°C': ('C', ...), # list of alias symbols for °C (Q69362731)
|
|
||||||
'°F': ('F', ...), # list of alias symbols for °F (Q99490479)
|
|
||||||
'mi': ('L',), # list of alias symbols for mi (Q253276)
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
SYMBOL_TO_SI = []
|
|
||||||
|
|
||||||
|
|
||||||
def symbol_to_si():
|
|
||||||
"""Generates a list of tuples, each tuple is a measure unit and the fields
|
|
||||||
in the tuple are:
|
|
||||||
|
|
||||||
0. Symbol of the measure unit (e.g. 'mi' for measure unit 'miles' Q253276)
|
|
||||||
|
|
||||||
1. SI name of the measure unit (e.g. Q11573 for SI unit 'metre')
|
|
||||||
|
|
||||||
2. Factor to get SI value from measure unit (e.g. 1mi is equal to SI 1m
|
|
||||||
multiplied by 1609.344)
|
|
||||||
|
|
||||||
3. Factor to get measure value from from SI value (e.g. SI 100m is equal to
|
|
||||||
100mi divided by 1609.344)
|
|
||||||
|
|
||||||
The returned list is sorted, the first items are created from
|
|
||||||
``WIKIDATA_UNITS``, the second group of items is build from
|
|
||||||
:py:obj:`ADDITIONAL_UNITS` and items created from :py:obj:`ALIAS_SYMBOLS`.
|
|
||||||
|
|
||||||
If you search this list for a symbol, then a match with a symbol from
|
|
||||||
Wikidata has the highest weighting (first hit in the list), followed by the
|
|
||||||
symbols from the :py:obj:`ADDITIONAL_UNITS` and the lowest weighting is
|
|
||||||
given to the symbols resulting from the aliases :py:obj:`ALIAS_SYMBOLS`.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
global SYMBOL_TO_SI # pylint: disable=global-statement
|
|
||||||
if SYMBOL_TO_SI:
|
|
||||||
return SYMBOL_TO_SI
|
|
||||||
|
|
||||||
# filter out units which can't be normalized to a SI unit and filter out
|
|
||||||
# units without a symbol / arcsecond does not have a symbol
|
|
||||||
# https://www.wikidata.org/wiki/Q829073
|
|
||||||
|
|
||||||
for item in data.WIKIDATA_UNITS.values():
|
|
||||||
if item['to_si_factor'] and item['symbol']:
|
|
||||||
SYMBOL_TO_SI.append(
|
|
||||||
(
|
|
||||||
item['symbol'],
|
|
||||||
item['si_name'],
|
|
||||||
1 / item['to_si_factor'], # from_si
|
|
||||||
item['to_si_factor'], # to_si
|
|
||||||
item['symbol'],
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
for item in ADDITIONAL_UNITS:
|
|
||||||
SYMBOL_TO_SI.append(
|
|
||||||
(
|
|
||||||
item['symbol'],
|
|
||||||
item['si_name'],
|
|
||||||
item['from_si'],
|
|
||||||
item['to_si'],
|
|
||||||
item['symbol'],
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
alias_items = []
|
|
||||||
for item in SYMBOL_TO_SI:
|
|
||||||
for alias in ALIAS_SYMBOLS.get(item[0], ()):
|
|
||||||
alias_items.append(
|
|
||||||
(
|
|
||||||
alias,
|
|
||||||
item[1],
|
|
||||||
item[2], # from_si
|
|
||||||
item[3], # to_si
|
|
||||||
item[0], # origin unit
|
|
||||||
)
|
|
||||||
)
|
|
||||||
SYMBOL_TO_SI = SYMBOL_TO_SI + alias_items
|
|
||||||
return SYMBOL_TO_SI
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_text_and_convert(from_query, to_query) -> str | None:
|
def _parse_text_and_convert(from_query, to_query) -> str | None:
|
||||||
|
|
||||||
# pylint: disable=too-many-branches, too-many-locals
|
# pylint: disable=too-many-branches, too-many-locals
|
||||||
|
231
searx/wikidata_units.py
Normal file
231
searx/wikidata_units.py
Normal file
@ -0,0 +1,231 @@
|
|||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
"""Unit conversion on the basis of `SPARQL/WIKIDATA Precision, Units and
|
||||||
|
Coordinates`_
|
||||||
|
|
||||||
|
.. _SPARQL/WIKIDATA Precision, Units and Coordinates:
|
||||||
|
https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates#Quantities
|
||||||
|
"""
|
||||||
|
|
||||||
|
__all__ = ["convert_from_si", "convert_to_si", "symbol_to_si"]
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
|
from searx import data
|
||||||
|
from searx.engines import wikidata
|
||||||
|
|
||||||
|
ADDITIONAL_UNITS = [
|
||||||
|
{
|
||||||
|
"si_name": "Q11579",
|
||||||
|
"symbol": "°C",
|
||||||
|
"to_si": lambda val: val + 273.15,
|
||||||
|
"from_si": lambda val: val - 273.15,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"si_name": "Q11579",
|
||||||
|
"symbol": "°F",
|
||||||
|
"to_si": lambda val: (val + 459.67) * 5 / 9,
|
||||||
|
"from_si": lambda val: (val * 9 / 5) - 459.67,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
"""Additional items to convert from a measure unit to a SI unit (vice versa).
|
||||||
|
|
||||||
|
.. code:: python
|
||||||
|
|
||||||
|
{
|
||||||
|
"si_name": "Q11579", # Wikidata item ID of the SI unit (Kelvin)
|
||||||
|
"symbol": "°C", # symbol of the measure unit
|
||||||
|
"to_si": lambda val: val + 273.15, # convert measure value (val) to SI unit
|
||||||
|
"from_si": lambda val: val - 273.15, # convert SI value (val) measure unit
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"si_name": "Q11573",
|
||||||
|
"symbol": "mi",
|
||||||
|
"to_si": 1609.344, # convert measure value (val) to SI unit
|
||||||
|
"from_si": 1 / 1609.344 # convert SI value (val) measure unit
|
||||||
|
},
|
||||||
|
|
||||||
|
The values of ``to_si`` and ``from_si`` can be of :py:obj:`float` (a multiplier)
|
||||||
|
or a callable_ (val in / converted value returned).
|
||||||
|
|
||||||
|
.. _callable: https://docs.python.org/3/glossary.html#term-callable
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
ALIAS_SYMBOLS = {
|
||||||
|
'°C': ('C',),
|
||||||
|
'°F': ('F',),
|
||||||
|
'mi': ('L',),
|
||||||
|
}
|
||||||
|
"""Alias symbols for known unit of measure symbols / by example::
|
||||||
|
|
||||||
|
'°C': ('C', ...), # list of alias symbols for °C (Q69362731)
|
||||||
|
'°F': ('F', ...), # list of alias symbols for °F (Q99490479)
|
||||||
|
'mi': ('L',), # list of alias symbols for mi (Q253276)
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
SYMBOL_TO_SI = []
|
||||||
|
UNITS_BY_SI_NAME: dict | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def convert_from_si(si_name: str, symbol: str, value: float | int) -> float:
|
||||||
|
from_si = units_by_si_name(si_name)[symbol][symbol]["from_si"]
|
||||||
|
if isinstance(from_si, (float, int)):
|
||||||
|
value = float(value) * from_si
|
||||||
|
else:
|
||||||
|
value = from_si(float(value))
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_si(si_name: str, symbol: str, value: float | int) -> float:
|
||||||
|
to_si = units_by_si_name(si_name)[symbol][symbol]["to_si"]
|
||||||
|
if isinstance(to_si, (float, int)):
|
||||||
|
value = float(value) * to_si
|
||||||
|
else:
|
||||||
|
value = to_si(float(value))
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def units_by_si_name(si_name):
|
||||||
|
|
||||||
|
global UNITS_BY_SI_NAME
|
||||||
|
if UNITS_BY_SI_NAME is not None:
|
||||||
|
return UNITS_BY_SI_NAME[si_name]
|
||||||
|
|
||||||
|
UNITS_BY_SI_NAME = {}
|
||||||
|
for item in symbol_to_si():
|
||||||
|
by_symbol = UNITS_BY_SI_NAME.get(si_name)
|
||||||
|
if by_symbol is None:
|
||||||
|
by_symbol = {}
|
||||||
|
UNITS_BY_SI_NAME[si_name] = by_symbol
|
||||||
|
by_symbol[item["symbol"]] = item
|
||||||
|
return UNITS_BY_SI_NAME[si_name]
|
||||||
|
|
||||||
|
|
||||||
|
def symbol_to_si():
|
||||||
|
"""Generates a list of tuples, each tuple is a measure unit and the fields
|
||||||
|
in the tuple are:
|
||||||
|
|
||||||
|
0. Symbol of the measure unit (e.g. 'mi' for measure unit 'miles' Q253276)
|
||||||
|
|
||||||
|
1. SI name of the measure unit (e.g. Q11573 for SI unit 'metre')
|
||||||
|
|
||||||
|
2. Factor to get SI value from measure unit (e.g. 1mi is equal to SI 1m
|
||||||
|
multiplied by 1609.344)
|
||||||
|
|
||||||
|
3. Factor to get measure value from from SI value (e.g. SI 100m is equal to
|
||||||
|
100mi divided by 1609.344)
|
||||||
|
|
||||||
|
The returned list is sorted, the first items are created from
|
||||||
|
``WIKIDATA_UNITS``, the second group of items is build from
|
||||||
|
:py:obj:`ADDITIONAL_UNITS` and items created from :py:obj:`ALIAS_SYMBOLS`.
|
||||||
|
|
||||||
|
If you search this list for a symbol, then a match with a symbol from
|
||||||
|
Wikidata has the highest weighting (first hit in the list), followed by the
|
||||||
|
symbols from the :py:obj:`ADDITIONAL_UNITS` and the lowest weighting is
|
||||||
|
given to the symbols resulting from the aliases :py:obj:`ALIAS_SYMBOLS`.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
global SYMBOL_TO_SI # pylint: disable=global-statement
|
||||||
|
if SYMBOL_TO_SI:
|
||||||
|
return SYMBOL_TO_SI
|
||||||
|
|
||||||
|
# filter out units which can't be normalized to a SI unit and filter out
|
||||||
|
# units without a symbol / arcsecond does not have a symbol
|
||||||
|
# https://www.wikidata.org/wiki/Q829073
|
||||||
|
|
||||||
|
for item in data.WIKIDATA_UNITS.values():
|
||||||
|
if item['to_si_factor'] and item['symbol']:
|
||||||
|
SYMBOL_TO_SI.append(
|
||||||
|
(
|
||||||
|
item['symbol'],
|
||||||
|
item['si_name'],
|
||||||
|
1 / item['to_si_factor'], # from_si
|
||||||
|
item['to_si_factor'], # to_si
|
||||||
|
item['symbol'],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
for item in ADDITIONAL_UNITS:
|
||||||
|
SYMBOL_TO_SI.append(
|
||||||
|
(
|
||||||
|
item['symbol'],
|
||||||
|
item['si_name'],
|
||||||
|
item['from_si'],
|
||||||
|
item['to_si'],
|
||||||
|
item['symbol'],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
alias_items = []
|
||||||
|
for item in SYMBOL_TO_SI:
|
||||||
|
for alias in ALIAS_SYMBOLS.get(item[0], ()):
|
||||||
|
alias_items.append(
|
||||||
|
(
|
||||||
|
alias,
|
||||||
|
item[1],
|
||||||
|
item[2], # from_si
|
||||||
|
item[3], # to_si
|
||||||
|
item[0], # origin unit
|
||||||
|
)
|
||||||
|
)
|
||||||
|
SYMBOL_TO_SI = SYMBOL_TO_SI + alias_items
|
||||||
|
return SYMBOL_TO_SI
|
||||||
|
|
||||||
|
|
||||||
|
# the response contains duplicate ?item with the different ?symbol
|
||||||
|
# "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result
|
||||||
|
# even if a ?item has different ?symbol of the same rank.
|
||||||
|
# A deterministic result
|
||||||
|
# see:
|
||||||
|
# * https://www.wikidata.org/wiki/Help:Ranking
|
||||||
|
# * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section)
|
||||||
|
# * https://w.wiki/32BT
|
||||||
|
# * https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates#Quantities
|
||||||
|
# see the result for https://www.wikidata.org/wiki/Q11582
|
||||||
|
# there are multiple symbols the same rank
|
||||||
|
|
||||||
|
SARQL_REQUEST = """
|
||||||
|
SELECT DISTINCT ?item ?symbol ?tosi ?tosiUnit
|
||||||
|
WHERE
|
||||||
|
{
|
||||||
|
?item wdt:P31/wdt:P279 wd:Q47574 .
|
||||||
|
?item p:P5061 ?symbolP .
|
||||||
|
?symbolP ps:P5061 ?symbol ;
|
||||||
|
wikibase:rank ?rank .
|
||||||
|
OPTIONAL {
|
||||||
|
?item p:P2370 ?tosistmt .
|
||||||
|
?tosistmt psv:P2370 ?tosinode .
|
||||||
|
?tosinode wikibase:quantityAmount ?tosi .
|
||||||
|
?tosinode wikibase:quantityUnit ?tosiUnit .
|
||||||
|
}
|
||||||
|
FILTER(LANG(?symbol) = "en").
|
||||||
|
}
|
||||||
|
ORDER BY ?item DESC(?rank) ?symbol
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_units():
|
||||||
|
"""Fetch units from Wikidata. Function is used to update persistence of
|
||||||
|
:py:obj:`searx.data.WIKIDATA_UNITS`."""
|
||||||
|
|
||||||
|
results = collections.OrderedDict()
|
||||||
|
response = wikidata.send_wikidata_query(SARQL_REQUEST)
|
||||||
|
for unit in response['results']['bindings']:
|
||||||
|
|
||||||
|
symbol = unit['symbol']['value']
|
||||||
|
name = unit['item']['value'].rsplit('/', 1)[1]
|
||||||
|
si_name = unit.get('tosiUnit', {}).get('value', '')
|
||||||
|
if si_name:
|
||||||
|
si_name = si_name.rsplit('/', 1)[1]
|
||||||
|
|
||||||
|
to_si_factor = unit.get('tosi', {}).get('value', '')
|
||||||
|
if name not in results:
|
||||||
|
# ignore duplicate: always use the first one
|
||||||
|
results[name] = {
|
||||||
|
'symbol': symbol,
|
||||||
|
'si_name': si_name if si_name else None,
|
||||||
|
'to_si_factor': float(to_si_factor) if to_si_factor else None,
|
||||||
|
}
|
||||||
|
return results
|
@ -8,76 +8,15 @@ Output file: :origin:`searx/data/wikidata_units.json` (:origin:`CI Update data
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import collections
|
|
||||||
|
|
||||||
# set path
|
|
||||||
from os.path import join
|
|
||||||
|
|
||||||
from searx import searx_dir
|
|
||||||
from searx.engines import wikidata, set_loggers
|
from searx.engines import wikidata, set_loggers
|
||||||
from searx.data import data_dir
|
from searx.data import data_dir
|
||||||
|
from searx.wikidata_units import fetch_units
|
||||||
|
|
||||||
DATA_FILE = data_dir / 'wikidata_units.json'
|
DATA_FILE = data_dir / 'wikidata_units.json'
|
||||||
|
|
||||||
set_loggers(wikidata, 'wikidata')
|
set_loggers(wikidata, 'wikidata')
|
||||||
|
|
||||||
# the response contains duplicate ?item with the different ?symbol
|
|
||||||
# "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result
|
|
||||||
# even if a ?item has different ?symbol of the same rank.
|
|
||||||
# A deterministic result
|
|
||||||
# see:
|
|
||||||
# * https://www.wikidata.org/wiki/Help:Ranking
|
|
||||||
# * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section)
|
|
||||||
# * https://w.wiki/32BT
|
|
||||||
# * https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates#Quantities
|
|
||||||
# see the result for https://www.wikidata.org/wiki/Q11582
|
|
||||||
# there are multiple symbols the same rank
|
|
||||||
SARQL_REQUEST = """
|
|
||||||
SELECT DISTINCT ?item ?symbol ?tosi ?tosiUnit
|
|
||||||
WHERE
|
|
||||||
{
|
|
||||||
?item wdt:P31/wdt:P279 wd:Q47574 .
|
|
||||||
?item p:P5061 ?symbolP .
|
|
||||||
?symbolP ps:P5061 ?symbol ;
|
|
||||||
wikibase:rank ?rank .
|
|
||||||
OPTIONAL {
|
|
||||||
?item p:P2370 ?tosistmt .
|
|
||||||
?tosistmt psv:P2370 ?tosinode .
|
|
||||||
?tosinode wikibase:quantityAmount ?tosi .
|
|
||||||
?tosinode wikibase:quantityUnit ?tosiUnit .
|
|
||||||
}
|
|
||||||
FILTER(LANG(?symbol) = "en").
|
|
||||||
}
|
|
||||||
ORDER BY ?item DESC(?rank) ?symbol
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
def get_data():
|
|
||||||
results = collections.OrderedDict()
|
|
||||||
response = wikidata.send_wikidata_query(SARQL_REQUEST)
|
|
||||||
for unit in response['results']['bindings']:
|
|
||||||
|
|
||||||
symbol = unit['symbol']['value']
|
|
||||||
name = unit['item']['value'].rsplit('/', 1)[1]
|
|
||||||
si_name = unit.get('tosiUnit', {}).get('value', '')
|
|
||||||
if si_name:
|
|
||||||
si_name = si_name.rsplit('/', 1)[1]
|
|
||||||
|
|
||||||
to_si_factor = unit.get('tosi', {}).get('value', '')
|
|
||||||
if name not in results:
|
|
||||||
# ignore duplicate: always use the first one
|
|
||||||
results[name] = {
|
|
||||||
'symbol': symbol,
|
|
||||||
'si_name': si_name if si_name else None,
|
|
||||||
'to_si_factor': float(to_si_factor) if to_si_factor else None,
|
|
||||||
}
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def get_wikidata_units_filename():
|
|
||||||
return join(join(searx_dir, "data"), "")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
with DATA_FILE.open('w', encoding="utf8") as f:
|
with DATA_FILE.open('w', encoding="utf8") as f:
|
||||||
json.dump(get_data(), f, indent=4, sort_keys=True, ensure_ascii=False)
|
json.dump(fetch_units(), f, indent=4, sort_keys=True, ensure_ascii=False)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user