added instructions group to regex

This commit is contained in:
Andrei Stoica 2022-07-24 16:18:18 -04:00
parent 910597b3ff
commit 53837ea657
2 changed files with 18 additions and 7 deletions

View File

@ -48,6 +48,7 @@ class RecipeIngredientParts(Base):
id = Column(Integer, ForeignKey("RecipeIngredient.id"), primary_key=True) id = Column(Integer, ForeignKey("RecipeIngredient.id"), primary_key=True)
quantity = Column(String) quantity = Column(String)
unit = Column(String) unit = Column(String)
instruction = Column(String)
ingredient = Column(String) ingredient = Column(String)
supplement = Column(String) supplement = Column(String)

View File

@ -12,17 +12,18 @@ import logging
from argparse import ArgumentParser from argparse import ArgumentParser
def parse_ingredient(ingredient_text): def parse_ingredient(ingredient_text):
units = ['teaspoon', 'tablespoon', 'gram', 'ounce', 'jar', 'cup', 'pinch', units = ['teaspoon', 'tablespoon', 'gram', 'ounce', 'jar', 'cup', 'pinch',
'container', 'slice', 'package', 'pound', 'can', 'dash', 'spear', 'container', 'slice', 'package', 'pound', 'can', 'dash', 'spear',
'bunch', 'quart', 'cube', 'envelope', 'square', 'sprig', 'bag', 'bunch', 'quart', 'cube', 'envelope', 'square', 'sprig', 'bag',
'box', 'drop', 'fluid ounce', 'gallon', 'head', 'link', 'loaf', 'box', 'drop', 'fluid ounce', 'gallon', 'head', 'link', 'loaf',
'pint', 'pod', 'sheet', 'stalk', 'whole', 'bar', 'bottle', 'bulb', 'pint', 'pod', 'sheet', 'stalk', 'whole', 'bar', 'bottle', 'bulb',
'year', 'fillet', 'litter', 'packet', 'slices'] 'year', 'fillet', 'litter', 'packet', 'slices']
instructions = ['and', 'or', 'chopped', 'diced', 'brewed', 'chilled', 'chunky', instructions = ['and', 'or', 'chopped', 'diced', 'brewed', 'chilled',
'small', 'medium', 'large', 'couarse(:?ly)?', 'cracked', 'chunky', 'small', 'medium', 'large', 'couarse', 'cracked',
'crushed', 'ground', 'cooked', 'cubed', 'crumbled', 'cut', 'crushed', 'ground', 'cooked', 'cubed', 'crumbled', 'cut',
'cold', 'hot', 'warm', 'day', 'old', 'drained', 'canned', 'cold', 'hot', 'warm', 'day', 'old', 'drained', 'canned',
'dried', 'dry', 'fine(?:ly)', 'firmly', 'fresh', 'frozen', 'dried', 'dry', 'fine', 'firm', 'fresh', 'frozen',
'grated', 'grilled', 'hard', 'hot', 'juliened?', 'leftover', 'grated', 'grilled', 'hard', 'hot', 'juliened?', 'leftover',
'light', 'lite', 'mashed', 'melted', 'minced', 'packed', 'light', 'lite', 'mashed', 'melted', 'minced', 'packed',
'peeled', 'pitted', 'sliced', 'prepared', 'refrigerated', 'peeled', 'pitted', 'sliced', 'prepared', 'refrigerated',
@ -36,9 +37,13 @@ def parse_ingredient(ingredient_text):
units_regex = "|".join([f'[{unit[0]}{unit[0].capitalize()}]{unit[1:]}' units_regex = "|".join([f'[{unit[0]}{unit[0].capitalize()}]{unit[1:]}'
for unit in units]) for unit in units])
units_regex = f"((?:(?:{units_regex})e?s?)?)" units_regex = f"((?:(?:{units_regex})e?s?)?)"
instructions_regex = "|".join([f'[{inst[0]}{inst[0].capitalize()}]{inst[1:]}'
for inst in instructions])
instructions_regex = f"((?:(?:(?:{instructions_regex})(?:ly)?)| )*)"
regex = re.compile(number_regex + regex = re.compile(number_regex +
units_regex + units_regex +
instructions_regex +
ingredient_regex + ingredient_regex +
supplement_regex) supplement_regex)
@ -47,7 +52,9 @@ def parse_ingredient(ingredient_text):
if not m: if not m:
return None return None
return [text.strip() for text in m.groups()] return [text.strip() if text else None for text in m.groups()]
def reparse_ingredients(session): def reparse_ingredients(session):
cte = (except_(select(db.RecipeIngredient.id), cte = (except_(select(db.RecipeIngredient.id),
@ -59,14 +66,16 @@ def reparse_ingredients(session):
parts = parse_ingredient(ingredient.text) parts = parse_ingredient(ingredient.text)
if not parts: if not parts:
continue continue
quantity, unit, name, supplement = parts quantity, unit, instruction, name, supplement = parts
session.add(db.RecipeIngredientParts(id = ingredient.id, session.add(db.RecipeIngredientParts(id = ingredient.id,
quantity = quantity, quantity = quantity,
unit = unit, unit = unit,
instruction = instruction,
ingredient = name, ingredient = name,
supplement = supplement)) supplement = supplement))
def load_recipe(recipe_url): def load_recipe(recipe_url):
try: try:
logging.info(f'Loading Recipe: {recipe_url}') logging.info(f'Loading Recipe: {recipe_url}')
@ -107,10 +116,11 @@ def parse_recipe(session, recipe, site):
parts = parse_ingredient(ingred.text) parts = parse_ingredient(ingred.text)
if parts: if parts:
quantity, unit, ingredient, supplement = parts quantity, unit, instruction,ingredient, supplement = parts
ingred_parts = db.RecipeIngredientParts(id = ingred.id, ingred_parts = db.RecipeIngredientParts(id = ingred.id,
quantity = quantity, quantity = quantity,
unit = unit, unit = unit,
instruction = instruction,
ingredient = ingredient, ingredient = ingredient,
supplement = supplement) supplement = supplement)
session.add(ingred_parts) session.add(ingred_parts)