refactoring

This commit is contained in:
Andrei Stoica 2022-10-15 14:40:16 -04:00
parent 294231dd48
commit 9a15f6c031
1 changed files with 56 additions and 56 deletions

View File

@ -1,9 +1,7 @@
from ast import alias import sys
from dis import Instruction from recipe_graph import db
import db
import re import re
from sqlalchemy import select, desc, exists, not_, except_ from sqlalchemy import select, desc, exists, not_, except_
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import sessionmaker
import bs4 import bs4
from urllib.request import urlopen from urllib.request import urlopen
@ -129,62 +127,64 @@ def parse_recipe(session, recipe, site):
return recipe return recipe
def main():
parser = ArgumentParser(description="Scrape a recipe site for recipies")
parser.add_argument('site',
help='Name of site')
parser.add_argument('-id', '--identifier', dest='id',
help='url of recipe(reletive to base url of site) or commma seperated list')
parser.add_argument('-a', '--auto', action='store', dest='n',
help='automaticaly generate identifier(must supply number of recipies to scrape)')
parser.add_argument('-v', '--verbose', action='store_true')
parser = ArgumentParser(description="Scrape a recipe site for recipies") args = parser.parse_args(sys.argv)
parser.add_argument('site', if args.verbose:
help='Name of site') logging.basicConfig(level=logging.INFO)
parser.add_argument('-id', '--identifier', dest='id', logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
help='url of recipe(reletive to base url of site) or commma seperated list')
parser.add_argument('-a', '--auto', action='store', dest='n',
help='automaticaly generate identifier(must supply number of recipies to scrape)')
parser.add_argument('-v', '--verbose', action='store_true')
args = parser.parse_args() eng = db.get_engine()
if args.verbose: S = sessionmaker(eng)
logging.basicConfig(level=logging.INFO)
logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
eng = db.get_engine() with S.begin() as sess:
S = sessionmaker(eng) site = sess.query(db.RecipeSite).where(db.RecipeSite.name == args.site).one()
site_id = site.id
with S.begin() as sess: recipe_ids = []
site = sess.query(db.RecipeSite).where(db.RecipeSite.name == args.site).one() starting_id = 0
site_id = site.id if args.id and not args.n:
recipe_ids.append(args.id)
recipe_ids = [] logging.info(f'Retreiving single recipe: {args.id}')
starting_id = 0 elif args.n:
if args.id and not args.n: if not args.id:
recipe_ids.append(args.id) last_recipe = sess.query(db.Recipe).\
logging.info(f'Retreiving single recipe: {args.id}') where(db.Recipe.recipe_site_id == site.id).\
elif args.n: order_by(desc(db.Recipe.identifier)).\
if not args.id: limit(1).\
last_recipe = sess.query(db.Recipe).\ scalar()
where(db.Recipe.recipe_site_id == site.id).\ starting_id = int(last_recipe.identifier) + 1
order_by(desc(db.Recipe.identifier)).\ else:
limit(1).\ starting_id = int(args.id)
scalar() recipe_ids = range(starting_id, starting_id+int(args.n))
starting_id = int(last_recipe.identifier) + 1 logging.info(f'Retreving {args.n} recipes from {site.base_url} starting at {starting_id}')
else:
starting_id = int(args.id)
recipe_ids = range(starting_id, starting_id+int(args.n))
logging.info(f'Retreving {args.n} recipes from {site.base_url} starting at {starting_id}')
for recipe_id in recipe_ids: for recipe_id in recipe_ids:
try: try:
savepoint = sess.begin_nested() savepoint = sess.begin_nested()
recipe = db.Recipe(identifier = recipe_id, recipe_site_id = site.id) recipe = db.Recipe(identifier = recipe_id, recipe_site_id = site.id)
parse_recipe(sess, recipe, site) parse_recipe(sess, recipe, site)
savepoint.commit() savepoint.commit()
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
savepoint.rollback() savepoint.rollback()
break break
except Exception as e: except Exception as e:
savepoint.rollback() savepoint.rollback()
logging.error(e) logging.error(e)
continue continue
if __name__ == "__main__": # pragma: no cover
main()