diff --git a/src/recipe_graph/scrape.py b/src/recipe_graph/scrape.py index 02489a7..9effc40 100644 --- a/src/recipe_graph/scrape.py +++ b/src/recipe_graph/scrape.py @@ -64,30 +64,20 @@ def parse_ingredient( return [text.strip() if text else None for text in m.groups()] - -def reparse_ingredients(session): +def get_missing_ingredients(session): cte = ( except_(select(db.RecipeIngredient.id), select(db.RecipeIngredientParts.id)) ).alias("missing") missing = ( session.query(db.RecipeIngredient).where(db.RecipeIngredient.id.in_(cte)).all() ) + return missing +def reparse_ingredients(session): + missing = get_missing_ingredients(session) for ingredient in missing: - parts = parse_ingredient(ingredient.text) - if not parts: - continue - quantity, unit, instruction, name, supplement = parts - session.add( - db.RecipeIngredientParts( - id=ingredient.id, - quantity=quantity, - unit=unit, - instruction=instruction, - ingredient=name, - supplement=supplement, - ) - ) + parts = ingredient_to_parts(ingredient) + session.add(parts) def load_page(recipe_url: str) -> bs4.BeautifulSoup: