From 70b33b5c0737c546598a3fd4381ee8693cfcd371 Mon Sep 17 00:00:00 2001 From: Andrei Stoica Date: Sun, 24 Jul 2022 21:01:31 -0400 Subject: [PATCH] updated README.md --- README.md | 35 ++++++++++++++++++++++++++++++----- src/func.sql | 13 ------------- src/triggers.sql | 20 -------------------- 3 files changed, 30 insertions(+), 38 deletions(-) delete mode 100644 src/func.sql delete mode 100644 src/triggers.sql diff --git a/README.md b/README.md index 4d01f19..ff9348a 100644 --- a/README.md +++ b/README.md @@ -22,14 +22,39 @@ python src/inser_sites.py data/sites.json ``` ## Usage +### Scrape import new recipes ```sh -python src/scrape.py +python src/scrape.py -id +``` +To scrape only one recipe. + +or +```sh +python src/scrape.py -a +``` +To scrape `` recipes + +By default it will start at id `0` or the greatest value of id alread in the +database. To start at another value please use both `-id` and `-a`. + +``` +Scrape a recipe site for recipies + +positional arguments: + site Name of site + +options: + -h, --help show this help message and exit + -id ID, --identifier ID + url of recipe(reletive to base url of site) or commma seperated list + -a N, --auto N automaticaly generate identifier(must supply number of recipies to scrape) + -v, --verbose ``` - ## TODO - > ☐ automate scraping\ + > ☑ automate scraping\ > ☐ extend importing funcionality to more websites\ - > ☐ extracting quantity and name (via regex)\ - > ☐ matching ingredients to recipe ingredients \ No newline at end of file + > ☑ extracting quantity and name (via regex)\ + > ☐ create ontology of ingredients + > ☐ visualization diff --git a/src/func.sql b/src/func.sql deleted file mode 100644 index 1a1d7ac..0000000 --- a/src/func.sql +++ /dev/null @@ -1,13 +0,0 @@ -DROP FUNCTION IF EXISTS cos_sim; -CREATE FUNCTION cos_sim(a TEXT, b TEXT) -returns REAL -AS $$ - from sentence_transformers import CrossEncoder, util - model_name = "cross-encoder/stsb-roberta-large" - - if not SD.get(model_name): - SD[model_name] = CrossEncoder(model_name) - model = SD[model_name] - - return model.predict([(a, b)])[0] -$$ LANGUAGE plpython3u; \ No newline at end of file diff --git a/src/triggers.sql b/src/triggers.sql deleted file mode 100644 index 434f2f8..0000000 --- a/src/triggers.sql +++ /dev/null @@ -1,20 +0,0 @@ -CREATE OR REPLACE FUNCTION recipe_ingredient_update() -RETURNS TRIGGER -AS -$$ -BEGIN - WITH I AS ( - SELECT "Ingredient".id, cos_sim(NEW.text, "Ingredient".name) as sim - FROM "Ingredient" - WHERE regexp_split_to_array(NEW.text, E'\\s+') && regexp_split_to_array("Ingredient".name, E'\\s+') - ORDER BY sim DESC - ) - SELECT I.id INTO NEW.ingredient_id from I LIMIT 1; - RETURN NEW; -END; -$$ LANGUAGE plpgsql; - -CREATE OR REPLACE TRIGGER match_ingredient - BEFORE INSERT ON "RecipeIngredient" - FOR EACH ROW - EXECUTE FUNCTION recipe_ingredient_update(); \ No newline at end of file