42 lines
1.5 KiB
Python
42 lines
1.5 KiB
Python
import db
|
|
from sqlalchemy import select
|
|
from sqlalchemy.orm import sessionmaker
|
|
import bs4
|
|
from urllib.request import urlopen
|
|
import logging
|
|
from argparse import ArgumentParser
|
|
|
|
parser = ArgumentParser(description="Scrape a recipe site for recipies")
|
|
parser.add_argument('site',
|
|
help='Name of site')
|
|
parser.add_argument('identifier',
|
|
help='url of recipe(reletive to base url of site)')
|
|
parser.add_argument('-v', '--verbose', action='store_true')
|
|
|
|
args = parser.parse_args()
|
|
if args.verbose:
|
|
logging.basicConfig(level=logging.INFO)
|
|
logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
|
|
|
|
eng = db.get_engine()
|
|
S = sessionmaker(eng)
|
|
|
|
with S.begin() as sess:
|
|
site = sess.query(db.RecipeSite).where(db.RecipeSite.name == 'AllRecipe').one()
|
|
|
|
recipe = db.Recipe(identifier = args.identifier, recipe_site_id = site.id)
|
|
with urlopen(site.base_url + recipe.identifier) as f:
|
|
recipe_page = bs4.BeautifulSoup(f.read().decode())
|
|
|
|
name_div = recipe_page.find_all(class_=site.name_class)[0]
|
|
recipe.name = name_div.text
|
|
sess.add(recipe)
|
|
sess.flush()
|
|
logging.info(f"Adding Recipe {recipe}")
|
|
|
|
ingredients = []
|
|
for ingredient in recipe_page.find_all(class_=site.ingredient_class):
|
|
ingredients.append(db.RecipeIngredient(text=ingredient.text,
|
|
recipe_id=recipe.id))
|
|
logging.info(f"{len(ingredients)} ingredients found. Inserting into DB")
|
|
sess.add_all(ingredients) |