From 339195fda694e5dee7b2f8a827e32b722e54434b Mon Sep 17 00:00:00 2001
From: Andrei Stoica <andrei.stoica.365@gmail.com>
Date: Thu, 18 May 2023 08:57:33 -0400
Subject: [PATCH] moved from urllib to requests

---
 pyproject.toml             |  3 ++-
 requirements.txt           |  1 +
 src/recipe_graph/scrape.py | 17 ++++++++---------
 test/test_scrape.py        |  2 +-
 4 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6a037f4..97eb16d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,5 +10,6 @@ dependencies = [
     "SQLAlchemy==1.4.39",
     "python-dotenv==0.20.0",
     "beautifulsoup4==4.11.1",
-    "psycopg2-binary==2.9.3"
+    "psycopg2-binary==2.9.3",
+    "requests~=2.30.0"
 ]
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 9bce4cb..85b77c6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,6 +12,7 @@ pyparsing==3.0.9
 pytest==7.1.3
 pytest-cov==4.0.0
 python-dotenv==0.20.0
+requests~=2.30.0
 soupsieve==2.3.2.post1
 SQLAlchemy==1.4.39
 tomli==2.0.1
diff --git a/src/recipe_graph/scrape.py b/src/recipe_graph/scrape.py
index fd58e82..42100da 100644
--- a/src/recipe_graph/scrape.py
+++ b/src/recipe_graph/scrape.py
@@ -4,7 +4,7 @@ import re
 from sqlalchemy import select, desc, exists, not_, except_
 from sqlalchemy.orm import sessionmaker
 import bs4
-from urllib.request import urlopen
+import requests as req
 from urllib.parse import urljoin
 import logging
 from argparse import ArgumentParser
@@ -73,23 +73,22 @@ def reparse_ingredients(session):
 
 
 
-def load_recipe(recipe_url):
+def load_page(recipe_url):
     try:    
-        logging.info(f'Loading Recipe: {recipe_url}')
-        with urlopen(recipe_url) as f:
-            if f.getcode() == 404:
-                raise Exception(f"Recipe Does not exist: {recipe_url}")
+        logging.info(f'Loading Page: {recipe_url}')
+        with req.get(recipe_url) as f:
+            if f.status_code == 404:
+                raise Exception(f"Page does not exist (404): {recipe_url}")
             return bs4.BeautifulSoup(f.read().decode(), 'html.parser')
 
     except Exception as e:
         logging.warning(f"Could not download or parse recipe: {recipe_url}")
         logging.warning(e)
 
-    return None
 
 def parse_recipe(session, recipe, site):
     recipe_url = urljoin(site.base_url, str(recipe.identifier))
-    recipe_page = load_recipe(recipe_url)
+    recipe_page = load_page(recipe_url)
     if not recipe_page:
         return None
 
@@ -126,7 +125,7 @@ def parse_recipe(session, recipe, site):
     
     return recipe
     
-def main():
+def main(): # pragma: no cover
     parser = ArgumentParser(description="Scrape a recipe site for recipies")
     parser.add_argument('site',
                         help='Name of site')
diff --git a/test/test_scrape.py b/test/test_scrape.py
index eaa84ac..905b56d 100644
--- a/test/test_scrape.py
+++ b/test/test_scrape.py
@@ -4,7 +4,7 @@ from bs4 import BeautifulSoup
 import pytest
 
 
-def test_load_recipe():
+def test_load_page():
     page = scrape.load_recipe("https://hs.andreistoica.ca:4943")
     assert type(page) == BeautifulSoup