added missing units to regex + function for reparsing missing ingredent text
This commit is contained in:
parent
c4c53d238a
commit
6f3056bc1c
|
|
@ -1,6 +1,7 @@
|
||||||
|
from ast import alias
|
||||||
import db
|
import db
|
||||||
import re
|
import re
|
||||||
from sqlalchemy import select, desc
|
from sqlalchemy import select, desc, exists, not_, except_
|
||||||
from sqlalchemy.exc import IntegrityError
|
from sqlalchemy.exc import IntegrityError
|
||||||
from sqlalchemy.orm import sessionmaker
|
from sqlalchemy.orm import sessionmaker
|
||||||
import bs4
|
import bs4
|
||||||
|
|
@ -10,13 +11,15 @@ import logging
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
|
|
||||||
def parse_ingredient(ingredient_text):
|
def parse_ingredient(ingredient_text):
|
||||||
units = ['teaspoon', 'tablespoon', 'gram', 'once', 'jar', 'cup', 'pinch']
|
units = ['teaspoon', 'tablespoon', 'gram', 'once', 'jar', 'cup', 'pinch',
|
||||||
number_regex = '((?:[\d\\u00BC-\\u00BE\\u2150-\\u215E]*\s?(?:\(.+\))?)*)'
|
'container', 'slice', 'package', 'pound', 'can', 'dash', 'spear',
|
||||||
|
'bunch', 'quart', 'cube', 'envelope', 'squars', 'sprig']
|
||||||
|
number_regex = '((?:[\d\\./\\u00BC-\\u00BE\\u2150-\\u215E]*\s?(?:\(.+\))?)*)'
|
||||||
ingredient_regex = '([a-zA-Z \'\-]+)'
|
ingredient_regex = '([a-zA-Z \'\-]+)'
|
||||||
supplement_regex = ',?(.*)'
|
supplement_regex = ',?(.*)'
|
||||||
units_regex = "|".join([f'[{unit[0]}{unit[0].capitalize()}]{unit[1:]}'
|
units_regex = "|".join([f'[{unit[0]}{unit[0].capitalize()}]{unit[1:]}'
|
||||||
for unit in units])
|
for unit in units])
|
||||||
units_regex = f"((?:{units_regex})[s]?)"
|
units_regex = f"((?:(?:{units_regex})e?s?)?)"
|
||||||
|
|
||||||
regex = re.compile(number_regex +
|
regex = re.compile(number_regex +
|
||||||
units_regex +
|
units_regex +
|
||||||
|
|
@ -30,6 +33,24 @@ def parse_ingredient(ingredient_text):
|
||||||
|
|
||||||
return [text.strip() for text in m.groups()]
|
return [text.strip() for text in m.groups()]
|
||||||
|
|
||||||
|
def reparse_ingredients(session):
|
||||||
|
cte = (except_(select(db.RecipeIngredient.id),
|
||||||
|
select(db.RecipeIngredientParts.id))).\
|
||||||
|
alias('missing')
|
||||||
|
missing = session.query(db.RecipeIngredient).where(db.RecipeIngredient.id.in_(cte)).all()
|
||||||
|
|
||||||
|
for ingredient in missing:
|
||||||
|
parts = parse_ingredient(ingredient.text)
|
||||||
|
if not parts:
|
||||||
|
continue
|
||||||
|
quantity, unit, name, supplement = parts
|
||||||
|
session.add(db.RecipeIngredientParts(id = ingredient.id,
|
||||||
|
quantity = quantity,
|
||||||
|
unit = unit,
|
||||||
|
ingredient = name,
|
||||||
|
supplement = supplement))
|
||||||
|
|
||||||
|
|
||||||
def load_recipe(recipe_url):
|
def load_recipe(recipe_url):
|
||||||
try:
|
try:
|
||||||
logging.info(f'Loading Recipe: {recipe_url}')
|
logging.info(f'Loading Recipe: {recipe_url}')
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue