refactoring
This commit is contained in:
parent
294231dd48
commit
9a15f6c031
|
|
@ -1,9 +1,7 @@
|
|||
from ast import alias
|
||||
from dis import Instruction
|
||||
import db
|
||||
import sys
|
||||
from recipe_graph import db
|
||||
import re
|
||||
from sqlalchemy import select, desc, exists, not_, except_
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
import bs4
|
||||
from urllib.request import urlopen
|
||||
|
|
@ -129,62 +127,64 @@ def parse_recipe(session, recipe, site):
|
|||
|
||||
return recipe
|
||||
|
||||
def main():
|
||||
parser = ArgumentParser(description="Scrape a recipe site for recipies")
|
||||
parser.add_argument('site',
|
||||
help='Name of site')
|
||||
parser.add_argument('-id', '--identifier', dest='id',
|
||||
help='url of recipe(reletive to base url of site) or commma seperated list')
|
||||
parser.add_argument('-a', '--auto', action='store', dest='n',
|
||||
help='automaticaly generate identifier(must supply number of recipies to scrape)')
|
||||
parser.add_argument('-v', '--verbose', action='store_true')
|
||||
|
||||
parser = ArgumentParser(description="Scrape a recipe site for recipies")
|
||||
parser.add_argument('site',
|
||||
help='Name of site')
|
||||
parser.add_argument('-id', '--identifier', dest='id',
|
||||
help='url of recipe(reletive to base url of site) or commma seperated list')
|
||||
parser.add_argument('-a', '--auto', action='store', dest='n',
|
||||
help='automaticaly generate identifier(must supply number of recipies to scrape)')
|
||||
parser.add_argument('-v', '--verbose', action='store_true')
|
||||
args = parser.parse_args(sys.argv)
|
||||
if args.verbose:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
|
||||
|
||||
args = parser.parse_args()
|
||||
if args.verbose:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
|
||||
eng = db.get_engine()
|
||||
S = sessionmaker(eng)
|
||||
|
||||
eng = db.get_engine()
|
||||
S = sessionmaker(eng)
|
||||
with S.begin() as sess:
|
||||
site = sess.query(db.RecipeSite).where(db.RecipeSite.name == args.site).one()
|
||||
site_id = site.id
|
||||
|
||||
with S.begin() as sess:
|
||||
site = sess.query(db.RecipeSite).where(db.RecipeSite.name == args.site).one()
|
||||
site_id = site.id
|
||||
|
||||
recipe_ids = []
|
||||
starting_id = 0
|
||||
if args.id and not args.n:
|
||||
recipe_ids.append(args.id)
|
||||
logging.info(f'Retreiving single recipe: {args.id}')
|
||||
elif args.n:
|
||||
if not args.id:
|
||||
last_recipe = sess.query(db.Recipe).\
|
||||
where(db.Recipe.recipe_site_id == site.id).\
|
||||
order_by(desc(db.Recipe.identifier)).\
|
||||
limit(1).\
|
||||
scalar()
|
||||
starting_id = int(last_recipe.identifier) + 1
|
||||
else:
|
||||
starting_id = int(args.id)
|
||||
recipe_ids = range(starting_id, starting_id+int(args.n))
|
||||
logging.info(f'Retreving {args.n} recipes from {site.base_url} starting at {starting_id}')
|
||||
recipe_ids = []
|
||||
starting_id = 0
|
||||
if args.id and not args.n:
|
||||
recipe_ids.append(args.id)
|
||||
logging.info(f'Retreiving single recipe: {args.id}')
|
||||
elif args.n:
|
||||
if not args.id:
|
||||
last_recipe = sess.query(db.Recipe).\
|
||||
where(db.Recipe.recipe_site_id == site.id).\
|
||||
order_by(desc(db.Recipe.identifier)).\
|
||||
limit(1).\
|
||||
scalar()
|
||||
starting_id = int(last_recipe.identifier) + 1
|
||||
else:
|
||||
starting_id = int(args.id)
|
||||
recipe_ids = range(starting_id, starting_id+int(args.n))
|
||||
logging.info(f'Retreving {args.n} recipes from {site.base_url} starting at {starting_id}')
|
||||
|
||||
|
||||
|
||||
for recipe_id in recipe_ids:
|
||||
try:
|
||||
savepoint = sess.begin_nested()
|
||||
for recipe_id in recipe_ids:
|
||||
try:
|
||||
savepoint = sess.begin_nested()
|
||||
|
||||
recipe = db.Recipe(identifier = recipe_id, recipe_site_id = site.id)
|
||||
parse_recipe(sess, recipe, site)
|
||||
recipe = db.Recipe(identifier = recipe_id, recipe_site_id = site.id)
|
||||
parse_recipe(sess, recipe, site)
|
||||
|
||||
savepoint.commit()
|
||||
except KeyboardInterrupt as e:
|
||||
savepoint.rollback()
|
||||
break
|
||||
except Exception as e:
|
||||
savepoint.rollback()
|
||||
logging.error(e)
|
||||
continue
|
||||
savepoint.commit()
|
||||
except KeyboardInterrupt as e:
|
||||
savepoint.rollback()
|
||||
break
|
||||
except Exception as e:
|
||||
savepoint.rollback()
|
||||
logging.error(e)
|
||||
continue
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
main()
|
||||
Loading…
Reference in New Issue