diff --git a/src/scrape.py b/src/scrape.py index fc86171..23d75e9 100644 --- a/src/scrape.py +++ b/src/scrape.py @@ -11,9 +11,11 @@ import logging from argparse import ArgumentParser def parse_ingredient(ingredient_text): - units = ['teaspoon', 'tablespoon', 'gram', 'once', 'jar', 'cup', 'pinch', + units = ['teaspoon', 'tablespoon', 'gram', 'ounce', 'jar', 'cup', 'pinch', 'container', 'slice', 'package', 'pound', 'can', 'dash', 'spear', - 'bunch', 'quart', 'cube', 'envelope', 'square', 'sprig'] + 'bunch', 'quart', 'cube', 'envelope', 'square', 'sprig', 'bags', + 'box', 'drop', 'fluid ounce', 'gallon', 'head', 'link', 'loaf', + 'pint', 'pod', 'sheet', 'stalk', 'whole', 'bar'] number_regex = '((?:[\d\\./\\u00BC-\\u00BE\\u2150-\\u215E]*\s?(?:\(.+\))?)*)' ingredient_regex = '([a-zA-Z \'\-]+)' supplement_regex = ',?(.*)'