Compare commits

...

1 Commits

Author SHA1 Message Date
Andrei Stoica ba15e3cad3 moved from urllib to requests
continuous-integration/drone/push Build is failing Details
2023-05-18 09:01:37 -04:00
4 changed files with 15 additions and 14 deletions

View File

@ -3,12 +3,13 @@ requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta" build-backend = "setuptools.build_meta"
[project] [project]
name = "recepie_graph" name = "recipe_graph"
version = "0.0.1" version = "0.0.1"
description = "mapping out recipes relations" description = "mapping out recipes relations"
dependencies = [ dependencies = [
"SQLAlchemy==1.4.39", "SQLAlchemy==1.4.39",
"python-dotenv==0.20.0", "python-dotenv==0.20.0",
"beautifulsoup4==4.11.1", "beautifulsoup4==4.11.1",
"psycopg2-binary==2.9.3" "psycopg2-binary==2.9.3",
"requests~=2.30.0"
] ]

View File

@ -12,6 +12,7 @@ pyparsing==3.0.9
pytest==7.1.3 pytest==7.1.3
pytest-cov==4.0.0 pytest-cov==4.0.0
python-dotenv==0.20.0 python-dotenv==0.20.0
requests~=2.30.0
soupsieve==2.3.2.post1 soupsieve==2.3.2.post1
SQLAlchemy==1.4.39 SQLAlchemy==1.4.39
tomli==2.0.1 tomli==2.0.1

View File

@ -4,7 +4,7 @@ import re
from sqlalchemy import select, desc, exists, not_, except_ from sqlalchemy import select, desc, exists, not_, except_
from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import sessionmaker
import bs4 import bs4
from urllib.request import urlopen import requests as req
from urllib.parse import urljoin from urllib.parse import urljoin
import logging import logging
from argparse import ArgumentParser from argparse import ArgumentParser
@ -73,23 +73,22 @@ def reparse_ingredients(session):
def load_recipe(recipe_url): def load_page(recipe_url):
try: try:
logging.info(f'Loading Recipe: {recipe_url}') logging.info(f'Loading Page: {recipe_url}')
with urlopen(recipe_url) as f: with req.get(recipe_url) as f:
if f.getcode() == 404: if f.status_code == 404:
raise Exception(f"Recipe Does not exist: {recipe_url}") raise Exception(f"Page does not exist (404): {recipe_url}")
return bs4.BeautifulSoup(f.read().decode(), 'html.parser') return bs4.BeautifulSoup(f.read().decode(), 'html.parser')
except Exception as e: except Exception as e:
logging.warning(f"Could not download or parse recipe: {recipe_url}") logging.warning(f"Could not download or parse recipe: {recipe_url}")
logging.warning(e) logging.warning(e)
return None
def parse_recipe(session, recipe, site): def parse_recipe(session, recipe, site):
recipe_url = urljoin(site.base_url, str(recipe.identifier)) recipe_url = urljoin(site.base_url, str(recipe.identifier))
recipe_page = load_recipe(recipe_url) recipe_page = load_page(recipe_url)
if not recipe_page: if not recipe_page:
return None return None
@ -126,7 +125,7 @@ def parse_recipe(session, recipe, site):
return recipe return recipe
def main(): def main(): # pragma: no cover
parser = ArgumentParser(description="Scrape a recipe site for recipies") parser = ArgumentParser(description="Scrape a recipe site for recipies")
parser.add_argument('site', parser.add_argument('site',
help='Name of site') help='Name of site')

View File

@ -4,9 +4,9 @@ from bs4 import BeautifulSoup
import pytest import pytest
def test_load_recipe(): def test_load_page():
page = scrape.load_recipe("https://hs.andreistoica.ca:4943") page = scrape.load_page("https://hs.andreistoica.ca:4943")
assert type(page) == BeautifulSoup assert type(page) == BeautifulSoup
page = scrape.load_recipe("https://hs.andreistoica.ca:4943/some-nonesense") page = scrape.load_page("https://hs.andreistoica.ca:4943/some-nonesense")
assert page == None assert page == None