Automatic import of N recipes
This commit is contained in:
parent
54ffd79836
commit
b5061caed5
|
|
@ -1,5 +1,5 @@
|
|||
import db
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy import select, desc
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
import bs4
|
||||
from urllib.request import urlopen
|
||||
|
|
@ -9,8 +9,10 @@ from argparse import ArgumentParser
|
|||
parser = ArgumentParser(description="Scrape a recipe site for recipies")
|
||||
parser.add_argument('site',
|
||||
help='Name of site')
|
||||
parser.add_argument('identifier',
|
||||
help='url of recipe(reletive to base url of site)')
|
||||
parser.add_argument('-id', '--identifier', dest='id',
|
||||
help='url of recipe(reletive to base url of site) or commma seperated list')
|
||||
parser.add_argument('-a', '--auto', action='store', dest='n',
|
||||
help='automaticaly generate identifier(must supply number of recipies to scrape)')
|
||||
parser.add_argument('-v', '--verbose', action='store_true')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
|
@ -22,21 +24,61 @@ eng = db.get_engine()
|
|||
S = sessionmaker(eng)
|
||||
|
||||
with S.begin() as sess:
|
||||
site = sess.query(db.RecipeSite).where(db.RecipeSite.name == 'AllRecipe').one()
|
||||
site = sess.query(db.RecipeSite).where(db.RecipeSite.name == args.site).one()
|
||||
site_id = site.id
|
||||
|
||||
recipe_ids = []
|
||||
starting_id = 0
|
||||
if args.id and not args.n:
|
||||
recipe_ids.append(args.id)
|
||||
logging.info(f'Retreiving single recipe: {args.id}')
|
||||
elif args.n:
|
||||
if not args.id:
|
||||
last_recipe = sess.query(db.Recipe).\
|
||||
where(db.Recipe.recipe_site_id == site.id).\
|
||||
order_by(desc(db.Recipe.identifier)).\
|
||||
limit(1).\
|
||||
scalar()
|
||||
starting_id = int(last_recipe.identifier) + 1
|
||||
else:
|
||||
starting_id = args.id
|
||||
recipe_ids = range(starting_id, starting_id+int(args.n))
|
||||
logging.info(f'Retreving {args.n} recipes from {site.base_url} starting at {starting_id}')
|
||||
|
||||
recipe = db.Recipe(identifier = args.identifier, recipe_site_id = site.id)
|
||||
with urlopen(site.base_url + recipe.identifier) as f:
|
||||
recipe_page = bs4.BeautifulSoup(f.read().decode())
|
||||
|
||||
|
||||
for recipe_id in recipe_ids:
|
||||
recipe = db.Recipe(identifier = recipe_id, recipe_site_id = site.id)
|
||||
|
||||
name_div = recipe_page.find_all(class_=site.name_class)[0]
|
||||
recipe.name = name_div.text
|
||||
sess.add(recipe)
|
||||
sess.flush()
|
||||
logging.info(f"Adding Recipe {recipe}")
|
||||
recipe_url = f'{site.base_url}/{recipe.identifier}'
|
||||
logging.info(f'Loading Recipe: {recipe_url}')
|
||||
try:
|
||||
with urlopen(recipe_url) as f:
|
||||
if f.getcode() == 404:
|
||||
raise Exception(f"Recipe Does not exist: {recipe_url}")
|
||||
recipe_page = bs4.BeautifulSoup(f.read().decode())
|
||||
|
||||
|
||||
name_candidates = recipe_page.find_all(class_=site.name_class)
|
||||
if len(name_candidates) == 0:
|
||||
raise Exception(f"Could not extract recipe name: {recipe_url}")
|
||||
name_div = name_candidates[0]
|
||||
except Exception as e:
|
||||
logging.warning(f"Could not download or parse recipe: {recipe_url}")
|
||||
logging.warning(e)
|
||||
continue
|
||||
|
||||
recipe.name = name_div.text
|
||||
logging.info(f"Adding Recipe {recipe}")
|
||||
|
||||
ingredients = []
|
||||
for ingredient in recipe_page.find_all(class_=site.ingredient_class):
|
||||
ingredients.append(db.RecipeIngredient(text=ingredient.text,
|
||||
recipe_id=recipe.id))
|
||||
logging.info(f"{len(ingredients)} ingredients found. Inserting into DB")
|
||||
sess.add_all(ingredients)
|
||||
sess.add(recipe)
|
||||
sess.flush()
|
||||
|
||||
ingredients = []
|
||||
ingred_candidates = recipe_page.find_all(class_=site.ingredient_class)
|
||||
for ingredient in ingred_candidates:
|
||||
ingredients.append(db.RecipeIngredient(text=ingredient.text,
|
||||
recipe_id=recipe.id))
|
||||
|
||||
logging.info(f"{len(ingredients)} ingredients found. Inserting into DB")
|
||||
sess.add_all(ingredients)
|
||||
Loading…
Reference in New Issue