updated README.md

This commit is contained in:
Andrei Stoica 2022-07-24 21:01:31 -04:00
parent 53837ea657
commit 70b33b5c07
3 changed files with 30 additions and 38 deletions

View File

@ -22,14 +22,39 @@ python src/inser_sites.py data/sites.json
``` ```
## Usage ## Usage
### Scrape
import new recipes import new recipes
```sh ```sh
python src/scrape.py <SiteName> <RecipeIdentifier> python src/scrape.py <SiteName> -id <RecipeIdentifier>
```
To scrape only one recipe.
or
```sh
python src/scrape.py <SiteName> -a <N>
```
To scrape `<N>` recipes
By default it will start at id `0` or the greatest value of id alread in the
database. To start at another value please use both `-id` and `-a`.
```
Scrape a recipe site for recipies
positional arguments:
site Name of site
options:
-h, --help show this help message and exit
-id ID, --identifier ID
url of recipe(reletive to base url of site) or commma seperated list
-a N, --auto N automaticaly generate identifier(must supply number of recipies to scrape)
-v, --verbose
``` ```
## TODO ## TODO
> ☐ automate scraping\ > automate scraping\
> ☐ extend importing funcionality to more websites\ > ☐ extend importing funcionality to more websites\
> ☐ extracting quantity and name (via regex)\ > ☑ extracting quantity and name (via regex)\
> ☐ matching ingredients to recipe ingredients > ☐ create ontology of ingredients
> ☐ visualization

View File

@ -1,13 +0,0 @@
DROP FUNCTION IF EXISTS cos_sim;
CREATE FUNCTION cos_sim(a TEXT, b TEXT)
returns REAL
AS $$
from sentence_transformers import CrossEncoder, util
model_name = "cross-encoder/stsb-roberta-large"
if not SD.get(model_name):
SD[model_name] = CrossEncoder(model_name)
model = SD[model_name]
return model.predict([(a, b)])[0]
$$ LANGUAGE plpython3u;

View File

@ -1,20 +0,0 @@
CREATE OR REPLACE FUNCTION recipe_ingredient_update()
RETURNS TRIGGER
AS
$$
BEGIN
WITH I AS (
SELECT "Ingredient".id, cos_sim(NEW.text, "Ingredient".name) as sim
FROM "Ingredient"
WHERE regexp_split_to_array(NEW.text, E'\\s+') && regexp_split_to_array("Ingredient".name, E'\\s+')
ORDER BY sim DESC
)
SELECT I.id INTO NEW.ingredient_id from I LIMIT 1;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE TRIGGER match_ingredient
BEFORE INSERT ON "RecipeIngredient"
FOR EACH ROW
EXECUTE FUNCTION recipe_ingredient_update();