import db from sqlalchemy import select from sqlalchemy.orm import sessionmaker import bs4 from urllib.request import urlopen import logging from argparse import ArgumentParser parser = ArgumentParser(description="Scrape a recipe site for recipies") parser.add_argument('site', help='Name of site') parser.add_argument('identifier', help='url of recipe(reletive to base url of site)') parser.add_argument('-v', '--verbose', action='store_true') args = parser.parse_args() if args.verbose: logging.basicConfig(level=logging.INFO) logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO) eng = db.get_engine() S = sessionmaker(eng) with S.begin() as sess: site = sess.query(db.RecipeSite).where(db.RecipeSite.name == 'AllRecipe').one() recipe = db.Recipe(identifier = args.identifier, recipe_site_id = site.id) with urlopen(site.base_url + recipe.identifier) as f: recipe_page = bs4.BeautifulSoup(f.read().decode()) name_div = recipe_page.find_all(class_=site.name_class)[0] recipe.name = name_div.text sess.add(recipe) sess.flush() logging.info(f"Adding Recipe {recipe}") ingredients = [] for ingredient in recipe_page.find_all(class_=site.ingredient_class): ingredients.append(db.RecipeIngredient(text=ingredient.text, recipe_id=recipe.id)) logging.info(f"{len(ingredients)} ingredients found. Inserting into DB") sess.add_all(ingredients)