moved from urllib to requests
continuous-integration/drone/push Build is failing Details

This commit is contained in:
Andrei Stoica 2023-05-18 08:57:33 -04:00
parent 794dbe7d88
commit f58d5db061
4 changed files with 12 additions and 11 deletions

View File

@ -10,5 +10,6 @@ dependencies = [
"SQLAlchemy==1.4.39",
"python-dotenv==0.20.0",
"beautifulsoup4==4.11.1",
"psycopg2-binary==2.9.3"
"psycopg2-binary==2.9.3",
"requests~=2.30.0"
]

View File

@ -12,6 +12,7 @@ pyparsing==3.0.9
pytest==7.1.3
pytest-cov==4.0.0
python-dotenv==0.20.0
requests~=2.30.0
soupsieve==2.3.2.post1
SQLAlchemy==1.4.39
tomli==2.0.1

View File

@ -4,7 +4,7 @@ import re
from sqlalchemy import select, desc, exists, not_, except_
from sqlalchemy.orm import sessionmaker
import bs4
from urllib.request import urlopen
import requests as req
from urllib.parse import urljoin
import logging
from argparse import ArgumentParser
@ -73,23 +73,22 @@ def reparse_ingredients(session):
def load_recipe(recipe_url):
def load_page(recipe_url):
try:
logging.info(f'Loading Recipe: {recipe_url}')
with urlopen(recipe_url) as f:
if f.getcode() == 404:
raise Exception(f"Recipe Does not exist: {recipe_url}")
logging.info(f'Loading Page: {recipe_url}')
with req.get(recipe_url) as f:
if f.status_code == 404:
raise Exception(f"Page does not exist (404): {recipe_url}")
return bs4.BeautifulSoup(f.read().decode(), 'html.parser')
except Exception as e:
logging.warning(f"Could not download or parse recipe: {recipe_url}")
logging.warning(e)
return None
def parse_recipe(session, recipe, site):
recipe_url = urljoin(site.base_url, str(recipe.identifier))
recipe_page = load_recipe(recipe_url)
recipe_page = load_page(recipe_url)
if not recipe_page:
return None
@ -126,7 +125,7 @@ def parse_recipe(session, recipe, site):
return recipe
def main():
def main(): # pragma: no cover
parser = ArgumentParser(description="Scrape a recipe site for recipies")
parser.add_argument('site',
help='Name of site')

View File

@ -4,7 +4,7 @@ from bs4 import BeautifulSoup
import pytest
def test_load_recipe():
def test_load_page():
page = scrape.load_recipe("https://hs.andreistoica.ca:4943")
assert type(page) == BeautifulSoup