Source code for ingredient_parser._common
#!/usr/bin/env python3
import collections
import logging
import os
import platform
import re
import subprocess
from collections.abc import Generator, Iterator
from importlib.resources import as_file, files
from itertools import groupby, islice
from operator import itemgetter
import pint
from nltk.data import find as nltk_find
from nltk.downloader import download as nltk_download
UREG = pint.UnitRegistry()
with as_file(files(__package__) / "pint_extensions.txt") as p:
# Load pint context that defines transformations between mass and volume
UREG.load_definitions(p)
SUPPORTED_LANGUAGES = ["en"]
# Logging
logger = logging.getLogger("ingredient-parser")
logger.addHandler(logging.NullHandler())
# Regex pattern for matching a numeric range e.g. 1-2, 2-3, #1$2-1#3$4.
RANGE_PATTERN = re.compile(r"^[\d\#\$]+\s*[\-][\d\#\$]+$")
[docs]
def consume(iterator: Iterator, n: int | None) -> None:
"""Advance the `iterator` n-steps ahead. If `n` is none, consume entirely.
See consume from https://docs.python.org/3/library/itertools.html#itertools-recipes
Parameters
----------
iterator : Iterator
Iterator to advance.
n : int | None
Number of iterations to advance by.
If None, consume entire iterator.
Examples
--------
>>> it = iter(range(10))
>>> consume(it, 3)
>>> next(it)
3
>>> it = iter(range(10))
>>> consume(it, None)
>>> next(it)
StopIteration
"""
if n is None:
# Feed the entire iterator into a zero-length deque
collections.deque(iterator, maxlen=0)
else:
# Advance to the empty slice starting at position n
next(islice(iterator, n, n), None)
[docs]
def group_consecutive_idx(idx: list[int]) -> Generator[Iterator[int], None, None]:
"""Yield groups of consecutive indices.
Given a list of integers, yield groups of integers where the value of each in a
group is adjacent to the previous element's value.
Parameters
----------
idx : list[int]
List of indices.
Yields
------
list[list[int]]
List of lists, where each sub-list contains consecutive indices.
Examples
--------
>>> groups = group_consecutive_idx([0, 1, 2, 4, 5, 6, 8, 9])
>>> [list(g) for g in groups]
[[0, 1, 2], [4, 5, 6], [8, 9]]
"""
for _, g in groupby(enumerate(idx), key=lambda x: x[0] - x[1]):
yield map(itemgetter(1), g)
[docs]
def show_model_card(lang: str = "en") -> None:
"""Open model card for specified language in default application.
Parameters
----------
lang : str, optional
Selected language to open model card for.
Raises
------
FileNotFoundError
Raised if model card not found at expected path.
ValueError
Raised if unsupported language provided in lang argument.
"""
if lang not in SUPPORTED_LANGUAGES:
raise ValueError(f'Unsupported language "{lang}"')
with as_file(files(__package__) / lang / f"data/ModelCard.{lang}.md") as p:
if not p.exists():
raise FileNotFoundError(f"Could not find Model Card at {p}")
if platform.system() == "Darwin": # macOS
subprocess.call(("open", p))
elif platform.system() == "Windows": # Windows
os.startfile(p) # type: ignore
else: # linux variants
subprocess.call(("xdg-open", p))
[docs]
def download_nltk_resources() -> None:
"""Check if required nltk resources can be found and if not, download them."""
try:
nltk_find(
"taggers/averaged_perceptron_tagger_eng/averaged_perceptron_tagger_eng.classes.json"
)
nltk_find(
"taggers/averaged_perceptron_tagger_eng/averaged_perceptron_tagger_eng.tagdict.json"
)
nltk_find(
"taggers/averaged_perceptron_tagger_eng/averaged_perceptron_tagger_eng.weights.json"
)
except LookupError:
print("Downloading required NLTK resource: averaged_perceptron_tagger_eng")
nltk_download("averaged_perceptron_tagger_eng")
[docs]
def is_float(value: str) -> bool:
"""Check if `value` can be converted to a float.
Parameters
----------
value : str
Value to check.
Returns
-------
bool
True if the value can be converted to float, else False.
Examples
--------
>>> is_float("3")
True
>>> is_float("2.5")
True
>>> is_float("1-2")
False
"""
try:
_ = float(value)
return True
except ValueError:
return False
[docs]
def is_range(value: str) -> bool:
"""Check if `value` is a range e.g. 100-200.
Parameters
----------
value : str
Value to check.
Returns
-------
bool
True if the value is a range, else False.
Examples
--------
>>> is_range("1-2")
True
>>> is_float("100-500")
True
>>> is_float("1")
False
"""
return RANGE_PATTERN.match(value) is not None