moved from urllib to requests
continuous-integration/drone/push Build is failing
Details
continuous-integration/drone/push Build is failing
Details
This commit is contained in:
parent
794dbe7d88
commit
f58d5db061
|
|
@ -10,5 +10,6 @@ dependencies = [
|
||||||
"SQLAlchemy==1.4.39",
|
"SQLAlchemy==1.4.39",
|
||||||
"python-dotenv==0.20.0",
|
"python-dotenv==0.20.0",
|
||||||
"beautifulsoup4==4.11.1",
|
"beautifulsoup4==4.11.1",
|
||||||
"psycopg2-binary==2.9.3"
|
"psycopg2-binary==2.9.3",
|
||||||
|
"requests~=2.30.0"
|
||||||
]
|
]
|
||||||
|
|
@ -12,6 +12,7 @@ pyparsing==3.0.9
|
||||||
pytest==7.1.3
|
pytest==7.1.3
|
||||||
pytest-cov==4.0.0
|
pytest-cov==4.0.0
|
||||||
python-dotenv==0.20.0
|
python-dotenv==0.20.0
|
||||||
|
requests~=2.30.0
|
||||||
soupsieve==2.3.2.post1
|
soupsieve==2.3.2.post1
|
||||||
SQLAlchemy==1.4.39
|
SQLAlchemy==1.4.39
|
||||||
tomli==2.0.1
|
tomli==2.0.1
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ import re
|
||||||
from sqlalchemy import select, desc, exists, not_, except_
|
from sqlalchemy import select, desc, exists, not_, except_
|
||||||
from sqlalchemy.orm import sessionmaker
|
from sqlalchemy.orm import sessionmaker
|
||||||
import bs4
|
import bs4
|
||||||
from urllib.request import urlopen
|
import requests as req
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
import logging
|
import logging
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
|
|
@ -73,23 +73,22 @@ def reparse_ingredients(session):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def load_recipe(recipe_url):
|
def load_page(recipe_url):
|
||||||
try:
|
try:
|
||||||
logging.info(f'Loading Recipe: {recipe_url}')
|
logging.info(f'Loading Page: {recipe_url}')
|
||||||
with urlopen(recipe_url) as f:
|
with req.get(recipe_url) as f:
|
||||||
if f.getcode() == 404:
|
if f.status_code == 404:
|
||||||
raise Exception(f"Recipe Does not exist: {recipe_url}")
|
raise Exception(f"Page does not exist (404): {recipe_url}")
|
||||||
return bs4.BeautifulSoup(f.read().decode(), 'html.parser')
|
return bs4.BeautifulSoup(f.read().decode(), 'html.parser')
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"Could not download or parse recipe: {recipe_url}")
|
logging.warning(f"Could not download or parse recipe: {recipe_url}")
|
||||||
logging.warning(e)
|
logging.warning(e)
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
def parse_recipe(session, recipe, site):
|
def parse_recipe(session, recipe, site):
|
||||||
recipe_url = urljoin(site.base_url, str(recipe.identifier))
|
recipe_url = urljoin(site.base_url, str(recipe.identifier))
|
||||||
recipe_page = load_recipe(recipe_url)
|
recipe_page = load_page(recipe_url)
|
||||||
if not recipe_page:
|
if not recipe_page:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
@ -126,7 +125,7 @@ def parse_recipe(session, recipe, site):
|
||||||
|
|
||||||
return recipe
|
return recipe
|
||||||
|
|
||||||
def main():
|
def main(): # pragma: no cover
|
||||||
parser = ArgumentParser(description="Scrape a recipe site for recipies")
|
parser = ArgumentParser(description="Scrape a recipe site for recipies")
|
||||||
parser.add_argument('site',
|
parser.add_argument('site',
|
||||||
help='Name of site')
|
help='Name of site')
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ from bs4 import BeautifulSoup
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
def test_load_recipe():
|
def test_load_page():
|
||||||
page = scrape.load_recipe("https://hs.andreistoica.ca:4943")
|
page = scrape.load_recipe("https://hs.andreistoica.ca:4943")
|
||||||
assert type(page) == BeautifulSoup
|
assert type(page) == BeautifulSoup
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue