From 3092f67899649f98c9cfa310dfb1cc72df43b4bf Mon Sep 17 00:00:00 2001 From: Andrei Stoica Date: Fri, 22 Jul 2022 18:57:26 -0400 Subject: [PATCH] added more units --- src/scrape.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/scrape.py b/src/scrape.py index fc86171..23d75e9 100644 --- a/src/scrape.py +++ b/src/scrape.py @@ -11,9 +11,11 @@ import logging from argparse import ArgumentParser def parse_ingredient(ingredient_text): - units = ['teaspoon', 'tablespoon', 'gram', 'once', 'jar', 'cup', 'pinch', + units = ['teaspoon', 'tablespoon', 'gram', 'ounce', 'jar', 'cup', 'pinch', 'container', 'slice', 'package', 'pound', 'can', 'dash', 'spear', - 'bunch', 'quart', 'cube', 'envelope', 'square', 'sprig'] + 'bunch', 'quart', 'cube', 'envelope', 'square', 'sprig', 'bags', + 'box', 'drop', 'fluid ounce', 'gallon', 'head', 'link', 'loaf', + 'pint', 'pod', 'sheet', 'stalk', 'whole', 'bar'] number_regex = '((?:[\d\\./\\u00BC-\\u00BE\\u2150-\\u215E]*\s?(?:\(.+\))?)*)' ingredient_regex = '([a-zA-Z \'\-]+)' supplement_regex = ',?(.*)'