Compare commits

...

2 Commits

Author SHA1 Message Date
Andrei Stoica 339195fda6 moved from urllib to requests
continuous-integration/drone/push Build is failing Details
2023-05-18 08:57:33 -04:00
Andrei Stoica 794dbe7d88 updated readme about testing 2023-05-17 22:15:53 -04:00
5 changed files with 14 additions and 13 deletions

View File

@ -102,8 +102,8 @@ docker-compose -p recipe-test down
```
Test are written in pytest framework. Currently focused on unittest.
Integration tests to come.
Test are written in pytest framework. Currently focused on unittest and code
coverage. Integration tests to come.
To run test use:
```sh

View File

@ -10,5 +10,6 @@ dependencies = [
"SQLAlchemy==1.4.39",
"python-dotenv==0.20.0",
"beautifulsoup4==4.11.1",
"psycopg2-binary==2.9.3"
"psycopg2-binary==2.9.3",
"requests~=2.30.0"
]

View File

@ -12,6 +12,7 @@ pyparsing==3.0.9
pytest==7.1.3
pytest-cov==4.0.0
python-dotenv==0.20.0
requests~=2.30.0
soupsieve==2.3.2.post1
SQLAlchemy==1.4.39
tomli==2.0.1

View File

@ -4,7 +4,7 @@ import re
from sqlalchemy import select, desc, exists, not_, except_
from sqlalchemy.orm import sessionmaker
import bs4
from urllib.request import urlopen
import requests as req
from urllib.parse import urljoin
import logging
from argparse import ArgumentParser
@ -73,23 +73,22 @@ def reparse_ingredients(session):
def load_recipe(recipe_url):
def load_page(recipe_url):
try:
logging.info(f'Loading Recipe: {recipe_url}')
with urlopen(recipe_url) as f:
if f.getcode() == 404:
raise Exception(f"Recipe Does not exist: {recipe_url}")
logging.info(f'Loading Page: {recipe_url}')
with req.get(recipe_url) as f:
if f.status_code == 404:
raise Exception(f"Page does not exist (404): {recipe_url}")
return bs4.BeautifulSoup(f.read().decode(), 'html.parser')
except Exception as e:
logging.warning(f"Could not download or parse recipe: {recipe_url}")
logging.warning(e)
return None
def parse_recipe(session, recipe, site):
recipe_url = urljoin(site.base_url, str(recipe.identifier))
recipe_page = load_recipe(recipe_url)
recipe_page = load_page(recipe_url)
if not recipe_page:
return None
@ -126,7 +125,7 @@ def parse_recipe(session, recipe, site):
return recipe
def main():
def main(): # pragma: no cover
parser = ArgumentParser(description="Scrape a recipe site for recipies")
parser.add_argument('site',
help='Name of site')

View File

@ -4,7 +4,7 @@ from bs4 import BeautifulSoup
import pytest
def test_load_recipe():
def test_load_page():
page = scrape.load_recipe("https://hs.andreistoica.ca:4943")
assert type(page) == BeautifulSoup