updated README.md
This commit is contained in:
parent
53837ea657
commit
70b33b5c07
35
README.md
35
README.md
|
|
@ -22,14 +22,39 @@ python src/inser_sites.py data/sites.json
|
||||||
```
|
```
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
### Scrape
|
||||||
import new recipes
|
import new recipes
|
||||||
```sh
|
```sh
|
||||||
python src/scrape.py <SiteName> <RecipeIdentifier>
|
python src/scrape.py <SiteName> -id <RecipeIdentifier>
|
||||||
|
```
|
||||||
|
To scrape only one recipe.
|
||||||
|
|
||||||
|
or
|
||||||
|
```sh
|
||||||
|
python src/scrape.py <SiteName> -a <N>
|
||||||
|
```
|
||||||
|
To scrape `<N>` recipes
|
||||||
|
|
||||||
|
By default it will start at id `0` or the greatest value of id alread in the
|
||||||
|
database. To start at another value please use both `-id` and `-a`.
|
||||||
|
|
||||||
|
```
|
||||||
|
Scrape a recipe site for recipies
|
||||||
|
|
||||||
|
positional arguments:
|
||||||
|
site Name of site
|
||||||
|
|
||||||
|
options:
|
||||||
|
-h, --help show this help message and exit
|
||||||
|
-id ID, --identifier ID
|
||||||
|
url of recipe(reletive to base url of site) or commma seperated list
|
||||||
|
-a N, --auto N automaticaly generate identifier(must supply number of recipies to scrape)
|
||||||
|
-v, --verbose
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
## TODO
|
## TODO
|
||||||
> ☐ automate scraping\
|
> ☑ automate scraping\
|
||||||
> ☐ extend importing funcionality to more websites\
|
> ☐ extend importing funcionality to more websites\
|
||||||
> ☐ extracting quantity and name (via regex)\
|
> ☑ extracting quantity and name (via regex)\
|
||||||
> ☐ matching ingredients to recipe ingredients
|
> ☐ create ontology of ingredients
|
||||||
|
> ☐ visualization
|
||||||
|
|
|
||||||
13
src/func.sql
13
src/func.sql
|
|
@ -1,13 +0,0 @@
|
||||||
DROP FUNCTION IF EXISTS cos_sim;
|
|
||||||
CREATE FUNCTION cos_sim(a TEXT, b TEXT)
|
|
||||||
returns REAL
|
|
||||||
AS $$
|
|
||||||
from sentence_transformers import CrossEncoder, util
|
|
||||||
model_name = "cross-encoder/stsb-roberta-large"
|
|
||||||
|
|
||||||
if not SD.get(model_name):
|
|
||||||
SD[model_name] = CrossEncoder(model_name)
|
|
||||||
model = SD[model_name]
|
|
||||||
|
|
||||||
return model.predict([(a, b)])[0]
|
|
||||||
$$ LANGUAGE plpython3u;
|
|
||||||
|
|
@ -1,20 +0,0 @@
|
||||||
CREATE OR REPLACE FUNCTION recipe_ingredient_update()
|
|
||||||
RETURNS TRIGGER
|
|
||||||
AS
|
|
||||||
$$
|
|
||||||
BEGIN
|
|
||||||
WITH I AS (
|
|
||||||
SELECT "Ingredient".id, cos_sim(NEW.text, "Ingredient".name) as sim
|
|
||||||
FROM "Ingredient"
|
|
||||||
WHERE regexp_split_to_array(NEW.text, E'\\s+') && regexp_split_to_array("Ingredient".name, E'\\s+')
|
|
||||||
ORDER BY sim DESC
|
|
||||||
)
|
|
||||||
SELECT I.id INTO NEW.ingredient_id from I LIMIT 1;
|
|
||||||
RETURN NEW;
|
|
||||||
END;
|
|
||||||
$$ LANGUAGE plpgsql;
|
|
||||||
|
|
||||||
CREATE OR REPLACE TRIGGER match_ingredient
|
|
||||||
BEFORE INSERT ON "RecipeIngredient"
|
|
||||||
FOR EACH ROW
|
|
||||||
EXECUTE FUNCTION recipe_ingredient_update();
|
|
||||||
Loading…
Reference in New Issue