added missing units to regex + function for reparsing missing ingredent text
This commit is contained in:
parent
c4c53d238a
commit
6f3056bc1c
|
|
@ -1,6 +1,7 @@
|
|||
from ast import alias
|
||||
import db
|
||||
import re
|
||||
from sqlalchemy import select, desc
|
||||
from sqlalchemy import select, desc, exists, not_, except_
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
import bs4
|
||||
|
|
@ -10,13 +11,15 @@ import logging
|
|||
from argparse import ArgumentParser
|
||||
|
||||
def parse_ingredient(ingredient_text):
|
||||
units = ['teaspoon', 'tablespoon', 'gram', 'once', 'jar', 'cup', 'pinch']
|
||||
number_regex = '((?:[\d\\u00BC-\\u00BE\\u2150-\\u215E]*\s?(?:\(.+\))?)*)'
|
||||
units = ['teaspoon', 'tablespoon', 'gram', 'once', 'jar', 'cup', 'pinch',
|
||||
'container', 'slice', 'package', 'pound', 'can', 'dash', 'spear',
|
||||
'bunch', 'quart', 'cube', 'envelope', 'squars', 'sprig']
|
||||
number_regex = '((?:[\d\\./\\u00BC-\\u00BE\\u2150-\\u215E]*\s?(?:\(.+\))?)*)'
|
||||
ingredient_regex = '([a-zA-Z \'\-]+)'
|
||||
supplement_regex = ',?(.*)'
|
||||
units_regex = "|".join([f'[{unit[0]}{unit[0].capitalize()}]{unit[1:]}'
|
||||
for unit in units])
|
||||
units_regex = f"((?:{units_regex})[s]?)"
|
||||
units_regex = f"((?:(?:{units_regex})e?s?)?)"
|
||||
|
||||
regex = re.compile(number_regex +
|
||||
units_regex +
|
||||
|
|
@ -30,6 +33,24 @@ def parse_ingredient(ingredient_text):
|
|||
|
||||
return [text.strip() for text in m.groups()]
|
||||
|
||||
def reparse_ingredients(session):
|
||||
cte = (except_(select(db.RecipeIngredient.id),
|
||||
select(db.RecipeIngredientParts.id))).\
|
||||
alias('missing')
|
||||
missing = session.query(db.RecipeIngredient).where(db.RecipeIngredient.id.in_(cte)).all()
|
||||
|
||||
for ingredient in missing:
|
||||
parts = parse_ingredient(ingredient.text)
|
||||
if not parts:
|
||||
continue
|
||||
quantity, unit, name, supplement = parts
|
||||
session.add(db.RecipeIngredientParts(id = ingredient.id,
|
||||
quantity = quantity,
|
||||
unit = unit,
|
||||
ingredient = name,
|
||||
supplement = supplement))
|
||||
|
||||
|
||||
def load_recipe(recipe_url):
|
||||
try:
|
||||
logging.info(f'Loading Recipe: {recipe_url}')
|
||||
|
|
|
|||
Loading…
Reference in New Issue