From e207c359ed12f6ef8ac90d943e6ad27d6e8a3109 Mon Sep 17 00:00:00 2001
From: Andrei Stoica <andrei.stoica.365@gmail.com>
Date: Thu, 18 May 2023 19:05:34 -0400
Subject: [PATCH] test parse_recipe_name

---
 src/recipe_graph/scrape.py | 71 ++++++++++++++++++++++++--------------
 test/test_scrape.py        | 57 +++++++++++++++++++++++++++++-
 2 files changed, 101 insertions(+), 27 deletions(-)

diff --git a/src/recipe_graph/scrape.py b/src/recipe_graph/scrape.py
index f234408..a3d3dde 100644
--- a/src/recipe_graph/scrape.py
+++ b/src/recipe_graph/scrape.py
@@ -9,6 +9,7 @@ from urllib.parse import urljoin
 import logging
 from argparse import ArgumentParser
 
+
 def ingredient_regex(units: list[str], instructions: list[str]) -> re.Pattern:
     number_regex = "((?:[\d\\./\\u00BC-\\u00BE\\u2150-\\u215E]*\s?(?:\(.+\))?)*)"
     ingredient_regex = "([a-zA-Z '\-]+)"
@@ -89,7 +90,7 @@ def reparse_ingredients(session):
         )
 
 
-def load_page(recipe_url):
+def load_page(recipe_url: str) -> bs4.BeautifulSoup:
     try:
         logging.info(f"Loading Page: {recipe_url}")
         with req.get(recipe_url) as resp:
@@ -102,43 +103,61 @@ def load_page(recipe_url):
         logging.warning(e)
 
 
+def parse_recipe_name(
+    site: db.RecipeSite,
+    page: bs4.BeautifulSoup,
+    recipe: db.Recipe,
+    url: str = None,
+) -> db.Recipe:
+    if not url:
+        url = {"site": site, "recipe": recipe}
+    name_candidates = page.find_all(class_=site.name_class)
+    if len(name_candidates) == 0:
+        raise Exception(f"Could not extract recipe name: {url}")
+    name_div = name_candidates[0]
+    recipe.name = name_div.text
+
+    logging.info(f"Adding Recipe {recipe.name} from {url}")
+
+    return recipe
+
+def parse_ingredient(
+    ingredient: db.Ingredient
+) -> db.RecipeIngredientParts:
+    parts = parse_ingredient(ingredient.text)
+    if parts:
+        quantity, unit, instruction, ingredient_name, supplement = parts
+        return db.RecipeIngredientParts(
+            id=ingredient.id,
+            quantity=quantity,
+            unit=unit,
+            instruction=instruction,
+            ingredient=ingredient_name,
+            supplement=supplement,
+        )
+
+
 def parse_recipe(session, recipe, site):
     recipe_url = urljoin(site.base_url, str(recipe.identifier))
     recipe_page = load_page(recipe_url)
     if not recipe_page:
         return None
 
-    name_candidates = recipe_page.find_all(class_=site.name_class)
-    if len(name_candidates) == 0:
-        raise Exception(f"Could not extract recipe name: {recipe_url}")
-    name_div = name_candidates[0]
-    recipe.name = name_div.text
-
-    logging.info(f"Adding Recipe {recipe.name} from {recipe_url}")
-
+    recipe = parse_recipe_name(site, recipe_page, recipe, recipe_url)
     session.add(recipe)
     session.flush()
 
-    ingred_candidates = recipe_page.find_all(class_=site.ingredient_class)
-    for candidate in ingred_candidates:
-        ingred = db.RecipeIngredient(text=candidate.text, recipe_id=recipe.id)
-        session.add(ingred)
+    candidates = recipe_page.find_all(class_=site.ingredient_class)
+    for candidate in candidates:
+        ingredient = db.RecipeIngredient(text=candidate.text, recipe_id=recipe.id)
+        session.add(ingredient)
         session.flush()
-
-        parts = parse_ingredient(ingred.text)
+        
+        parts = parse_ingredient(ingredient)
         if parts:
-            quantity, unit, instruction, ingredient, supplement = parts
-            ingred_parts = db.RecipeIngredientParts(
-                id=ingred.id,
-                quantity=quantity,
-                unit=unit,
-                instruction=instruction,
-                ingredient=ingredient,
-                supplement=supplement,
-            )
-            session.add(ingred_parts)
+            session.add(parts)
 
-    logging.info(f"{len(ingred_candidates)} ingredients found. Inserting into DB")
+    logging.info(f"{len(candidates)} ingredients found. Inserting into DB")
 
     return recipe
 
diff --git a/test/test_scrape.py b/test/test_scrape.py
index 4a7da5d..565b451 100644
--- a/test/test_scrape.py
+++ b/test/test_scrape.py
@@ -1,7 +1,47 @@
 from recipe_graph import scrape
 from bs4 import BeautifulSoup
+from recipe_graph.db import RecipeSite, Recipe, RecipeIngredient, RecipeIngredientParts
 
-import pytest
+from pytest import fixture
+
+
+@fixture
+def mock_site():
+    return RecipeSite(
+        name="mock-site",
+        ingredient_class="mock-ing",
+        name_class="mock-name",
+        base_url="example-site/mock-site",
+    )
+
+
+# TODO: should probably load HTML from file
+@fixture
+def mock_page():
+    return BeautifulSoup(
+        """
+                    <header></header><body>
+                        <div class="mock-name">test_recipe</div>
+                        <div class="mock-ing">test_ingredient</div>
+                    </body>
+                  """,
+        "html.parser",
+    )
+
+
+@fixture
+def mock_blank_page():
+    return BeautifulSoup(""" <header></header><body> </body> """, "html.parser")
+
+
+@fixture
+def mock_recipe():
+    return Recipe(name="test_recipe", identifier="mock_1")
+
+
+@fixture
+def mock_url():
+    return "example-site/mock-site"
 
 
 def test_load_page():
@@ -23,3 +63,18 @@ def test_ingredient_regex():
         regex.pattern
         == "((?:[\\d\\./\\u00BC-\\u00BE\\u2150-\\u215E]*\\s?(?:\\(.+\\))?)*)((?:(?:[cC]up|[oO]unce)e?s?)?)((?:(?:(?:[cC]rushed|[gG]round)(?:ly)?)| )*)([a-zA-Z '\\-]+),?(.*)"
     )
+
+
+def test_parse_recipe_name(mock_site, mock_page, mock_recipe, mock_url):
+    expected_name = mock_recipe.name
+    mock_recipe.name = None
+
+    mock_recipe = scrape.parse_recipe_name(
+        mock_site,
+        mock_page,
+        mock_recipe,
+        mock_url,
+    )
+    assert mock_recipe.name == expected_name
+    # assert False
+