third_iteration.py
Toggle Theme
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import logging
import logging.config
import json
from typing import Tuple
LOGGING_CONFIG_FILE = "logging-config.json"
logger = logging.getLogger("")
def main() -> None:
logger.info("--- Cuisine Analysis Script Started ---")
# Load the dataset
try:
cuisines_df: pd.DataFrame = pd.read_csv("./cuisines.csv", index_col=0)
logger.info("Cuisines dataset loaded successfully.")
except Exception as e:
logger.critical(
f"An unexpected error occurred while loading 'cuisines.csv': {e}",
exc_info=True,
)
return
# Get the list of unique cuisines with a additional non-existent cuisine
target_cuisines: Tuple[str, ...] = tuple(cuisines_df["cuisine"].unique()) + (
"nonexistent_cuisine ;)",
)
logger.info(f"Identified target cuisines: {', '.join(target_cuisines)}")
# For each cuisine, process and plot ingredient data
for cuisine in target_cuisines:
logger.info(f"Initiating analysis for cuisine: '{cuisine}'.")
sorted_cuisine_ingredients: pd.DataFrame = get_sorted_cuisine_ingredients(
cuisines_df, cuisine
)
plot_cuisine_ingredients(sorted_cuisine_ingredients, cuisine)
logger.info("--- Cuisine Analysis Script Finished ---")
def get_sorted_cuisine_ingredients(
df: pd.DataFrame,
cuisine_name: str,
) -> pd.DataFrame:
"""
Filters a DataFrame by cuisine and returns a sorted DataFrame of ingredient counts.
"""
logger.info(f"Processing ingredients for cuisine: '{cuisine_name}'.")
# Filter rows matching the given cuisine (case-insensitive)
filtered = df[df["cuisine"].str.lower() == cuisine_name.lower()]
if filtered.empty:
logger.warning(
f"No data found for cuisine: '{cuisine_name}'. Returning empty DataFrame."
)
return pd.DataFrame()
logger.debug(f"Found {len(filtered)} rows for '{cuisine_name}'.")
# Sum the ingredient counts, dropping the 'cuisine' column
ingredient_totals: pd.Series = (
filtered
.T
.drop(["cuisine"])
.sum(axis=1)
)
# Convert to DataFrame for plotting
cuisine_counts: pd.DataFrame = ingredient_totals.to_frame("value")
# Keep only ingredients that are actually used
present_ingredients: pd.DataFrame = cuisine_counts[cuisine_counts["value"] != 0]
logger.debug(
f"Identified {len(present_ingredients)} present ingredients for '{cuisine_name}'."
)
# Sort by frequency
sorted_ingredients: pd.DataFrame = present_ingredients.sort_values(
by="value", ascending=False
)
logger.info(f"Successfully sorted ingredients for '{cuisine_name}'.")
return sorted_ingredients
def plot_cuisine_ingredients(
df_to_plot: pd.DataFrame, cuisine_name: str, top_n: int = 10
) -> None:
"""
Plots the top N most common ingredients for a given cuisine.
"""
logger.info(f"Attempting to plot top {top_n} ingredients for '{cuisine_name}'.")
if df_to_plot.empty:
logger.warning(
f"Skipping plot for '{cuisine_name}': DataFrame to plot is empty."
)
return
# Get plot title
title: str = f"Top {top_n} Most Common {cuisine_name.capitalize()} Ingredients"
# Plot horizontal bar chart
try:
df_to_plot.head(top_n).plot.barh(title=title)
plt.xlabel("Count")
plt.ylabel("Ingredient")
plt.gca().invert_yaxis()
plt.show()
logger.info(f"Plot displayed successfully for '{cuisine_name}'.")
except Exception as e:
logger.error(
f"Failed to plot ingredients for '{cuisine_name}': {e}", exc_info=True
)
if __name__ == "__main__":
# Load logging configuration
try:
with open(LOGGING_CONFIG_FILE, "r") as f:
config_dict = json.load(f)
logging.config.dictConfig(config_dict)
logger.info(f"Logging configured successfully from {LOGGING_CONFIG_FILE}.")
except Exception as e:
print(f"An unexpected error occurred during logging setup: {e}")
exit(1)
main()