added instructions group to regex
This commit is contained in:
parent
910597b3ff
commit
53837ea657
|
|
@ -48,6 +48,7 @@ class RecipeIngredientParts(Base):
|
||||||
id = Column(Integer, ForeignKey("RecipeIngredient.id"), primary_key=True)
|
id = Column(Integer, ForeignKey("RecipeIngredient.id"), primary_key=True)
|
||||||
quantity = Column(String)
|
quantity = Column(String)
|
||||||
unit = Column(String)
|
unit = Column(String)
|
||||||
|
instruction = Column(String)
|
||||||
ingredient = Column(String)
|
ingredient = Column(String)
|
||||||
supplement = Column(String)
|
supplement = Column(String)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,17 +12,18 @@ import logging
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
|
|
||||||
def parse_ingredient(ingredient_text):
|
def parse_ingredient(ingredient_text):
|
||||||
|
|
||||||
units = ['teaspoon', 'tablespoon', 'gram', 'ounce', 'jar', 'cup', 'pinch',
|
units = ['teaspoon', 'tablespoon', 'gram', 'ounce', 'jar', 'cup', 'pinch',
|
||||||
'container', 'slice', 'package', 'pound', 'can', 'dash', 'spear',
|
'container', 'slice', 'package', 'pound', 'can', 'dash', 'spear',
|
||||||
'bunch', 'quart', 'cube', 'envelope', 'square', 'sprig', 'bag',
|
'bunch', 'quart', 'cube', 'envelope', 'square', 'sprig', 'bag',
|
||||||
'box', 'drop', 'fluid ounce', 'gallon', 'head', 'link', 'loaf',
|
'box', 'drop', 'fluid ounce', 'gallon', 'head', 'link', 'loaf',
|
||||||
'pint', 'pod', 'sheet', 'stalk', 'whole', 'bar', 'bottle', 'bulb',
|
'pint', 'pod', 'sheet', 'stalk', 'whole', 'bar', 'bottle', 'bulb',
|
||||||
'year', 'fillet', 'litter', 'packet', 'slices']
|
'year', 'fillet', 'litter', 'packet', 'slices']
|
||||||
instructions = ['and', 'or', 'chopped', 'diced', 'brewed', 'chilled', 'chunky',
|
instructions = ['and', 'or', 'chopped', 'diced', 'brewed', 'chilled',
|
||||||
'small', 'medium', 'large', 'couarse(:?ly)?', 'cracked',
|
'chunky', 'small', 'medium', 'large', 'couarse', 'cracked',
|
||||||
'crushed', 'ground', 'cooked', 'cubed', 'crumbled', 'cut',
|
'crushed', 'ground', 'cooked', 'cubed', 'crumbled', 'cut',
|
||||||
'cold', 'hot', 'warm', 'day', 'old', 'drained', 'canned',
|
'cold', 'hot', 'warm', 'day', 'old', 'drained', 'canned',
|
||||||
'dried', 'dry', 'fine(?:ly)', 'firmly', 'fresh', 'frozen',
|
'dried', 'dry', 'fine', 'firm', 'fresh', 'frozen',
|
||||||
'grated', 'grilled', 'hard', 'hot', 'juliened?', 'leftover',
|
'grated', 'grilled', 'hard', 'hot', 'juliened?', 'leftover',
|
||||||
'light', 'lite', 'mashed', 'melted', 'minced', 'packed',
|
'light', 'lite', 'mashed', 'melted', 'minced', 'packed',
|
||||||
'peeled', 'pitted', 'sliced', 'prepared', 'refrigerated',
|
'peeled', 'pitted', 'sliced', 'prepared', 'refrigerated',
|
||||||
|
|
@ -36,9 +37,13 @@ def parse_ingredient(ingredient_text):
|
||||||
units_regex = "|".join([f'[{unit[0]}{unit[0].capitalize()}]{unit[1:]}'
|
units_regex = "|".join([f'[{unit[0]}{unit[0].capitalize()}]{unit[1:]}'
|
||||||
for unit in units])
|
for unit in units])
|
||||||
units_regex = f"((?:(?:{units_regex})e?s?)?)"
|
units_regex = f"((?:(?:{units_regex})e?s?)?)"
|
||||||
|
instructions_regex = "|".join([f'[{inst[0]}{inst[0].capitalize()}]{inst[1:]}'
|
||||||
|
for inst in instructions])
|
||||||
|
instructions_regex = f"((?:(?:(?:{instructions_regex})(?:ly)?)| )*)"
|
||||||
|
|
||||||
regex = re.compile(number_regex +
|
regex = re.compile(number_regex +
|
||||||
units_regex +
|
units_regex +
|
||||||
|
instructions_regex +
|
||||||
ingredient_regex +
|
ingredient_regex +
|
||||||
supplement_regex)
|
supplement_regex)
|
||||||
|
|
||||||
|
|
@ -47,7 +52,9 @@ def parse_ingredient(ingredient_text):
|
||||||
if not m:
|
if not m:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return [text.strip() for text in m.groups()]
|
return [text.strip() if text else None for text in m.groups()]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def reparse_ingredients(session):
|
def reparse_ingredients(session):
|
||||||
cte = (except_(select(db.RecipeIngredient.id),
|
cte = (except_(select(db.RecipeIngredient.id),
|
||||||
|
|
@ -59,14 +66,16 @@ def reparse_ingredients(session):
|
||||||
parts = parse_ingredient(ingredient.text)
|
parts = parse_ingredient(ingredient.text)
|
||||||
if not parts:
|
if not parts:
|
||||||
continue
|
continue
|
||||||
quantity, unit, name, supplement = parts
|
quantity, unit, instruction, name, supplement = parts
|
||||||
session.add(db.RecipeIngredientParts(id = ingredient.id,
|
session.add(db.RecipeIngredientParts(id = ingredient.id,
|
||||||
quantity = quantity,
|
quantity = quantity,
|
||||||
unit = unit,
|
unit = unit,
|
||||||
|
instruction = instruction,
|
||||||
ingredient = name,
|
ingredient = name,
|
||||||
supplement = supplement))
|
supplement = supplement))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def load_recipe(recipe_url):
|
def load_recipe(recipe_url):
|
||||||
try:
|
try:
|
||||||
logging.info(f'Loading Recipe: {recipe_url}')
|
logging.info(f'Loading Recipe: {recipe_url}')
|
||||||
|
|
@ -107,10 +116,11 @@ def parse_recipe(session, recipe, site):
|
||||||
|
|
||||||
parts = parse_ingredient(ingred.text)
|
parts = parse_ingredient(ingred.text)
|
||||||
if parts:
|
if parts:
|
||||||
quantity, unit, ingredient, supplement = parts
|
quantity, unit, instruction,ingredient, supplement = parts
|
||||||
ingred_parts = db.RecipeIngredientParts(id = ingred.id,
|
ingred_parts = db.RecipeIngredientParts(id = ingred.id,
|
||||||
quantity = quantity,
|
quantity = quantity,
|
||||||
unit = unit,
|
unit = unit,
|
||||||
|
instruction = instruction,
|
||||||
ingredient = ingredient,
|
ingredient = ingredient,
|
||||||
supplement = supplement)
|
supplement = supplement)
|
||||||
session.add(ingred_parts)
|
session.add(ingred_parts)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue