updated README.md
This commit is contained in:
parent
53837ea657
commit
70b33b5c07
35
README.md
35
README.md
|
|
@ -22,14 +22,39 @@ python src/inser_sites.py data/sites.json
|
|||
```
|
||||
|
||||
## Usage
|
||||
### Scrape
|
||||
import new recipes
|
||||
```sh
|
||||
python src/scrape.py <SiteName> <RecipeIdentifier>
|
||||
python src/scrape.py <SiteName> -id <RecipeIdentifier>
|
||||
```
|
||||
To scrape only one recipe.
|
||||
|
||||
or
|
||||
```sh
|
||||
python src/scrape.py <SiteName> -a <N>
|
||||
```
|
||||
To scrape `<N>` recipes
|
||||
|
||||
By default it will start at id `0` or the greatest value of id alread in the
|
||||
database. To start at another value please use both `-id` and `-a`.
|
||||
|
||||
```
|
||||
Scrape a recipe site for recipies
|
||||
|
||||
positional arguments:
|
||||
site Name of site
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
-id ID, --identifier ID
|
||||
url of recipe(reletive to base url of site) or commma seperated list
|
||||
-a N, --auto N automaticaly generate identifier(must supply number of recipies to scrape)
|
||||
-v, --verbose
|
||||
```
|
||||
|
||||
|
||||
## TODO
|
||||
> ☐ automate scraping\
|
||||
> ☑ automate scraping\
|
||||
> ☐ extend importing funcionality to more websites\
|
||||
> ☐ extracting quantity and name (via regex)\
|
||||
> ☐ matching ingredients to recipe ingredients
|
||||
> ☑ extracting quantity and name (via regex)\
|
||||
> ☐ create ontology of ingredients
|
||||
> ☐ visualization
|
||||
|
|
|
|||
13
src/func.sql
13
src/func.sql
|
|
@ -1,13 +0,0 @@
|
|||
DROP FUNCTION IF EXISTS cos_sim;
|
||||
CREATE FUNCTION cos_sim(a TEXT, b TEXT)
|
||||
returns REAL
|
||||
AS $$
|
||||
from sentence_transformers import CrossEncoder, util
|
||||
model_name = "cross-encoder/stsb-roberta-large"
|
||||
|
||||
if not SD.get(model_name):
|
||||
SD[model_name] = CrossEncoder(model_name)
|
||||
model = SD[model_name]
|
||||
|
||||
return model.predict([(a, b)])[0]
|
||||
$$ LANGUAGE plpython3u;
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
CREATE OR REPLACE FUNCTION recipe_ingredient_update()
|
||||
RETURNS TRIGGER
|
||||
AS
|
||||
$$
|
||||
BEGIN
|
||||
WITH I AS (
|
||||
SELECT "Ingredient".id, cos_sim(NEW.text, "Ingredient".name) as sim
|
||||
FROM "Ingredient"
|
||||
WHERE regexp_split_to_array(NEW.text, E'\\s+') && regexp_split_to_array("Ingredient".name, E'\\s+')
|
||||
ORDER BY sim DESC
|
||||
)
|
||||
SELECT I.id INTO NEW.ingredient_id from I LIMIT 1;
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE TRIGGER match_ingredient
|
||||
BEFORE INSERT ON "RecipeIngredient"
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION recipe_ingredient_update();
|
||||
Loading…
Reference in New Issue