engineering-economics-probl.../src/problem_engine.py

import sys
import os
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

import random
import math # For direct use if not going through formula_evaluator for simple conversions
from src import data_loader
from src import value_sampler
from src import formula_evaluator
from src import narrative_builder
from src import solution_presenter
from src import date_utils # For potential date-based time calculations

# --- Cached Data ---
FINANCIAL_CONCEPTS = None
TEXT_SNIPPETS_DATA = None # To avoid conflict with value_sampler's TEXT_SNIPPETS
NAMES_DATA_CACHE = None
VALUE_RANGES_CACHE = None

def _load_all_data_cached():
    global FINANCIAL_CONCEPTS, TEXT_SNIPPETS_DATA, NAMES_DATA_CACHE, VALUE_RANGES_CACHE
    if FINANCIAL_CONCEPTS is None:
        FINANCIAL_CONCEPTS = data_loader.get_financial_concepts()
    if TEXT_SNIPPETS_DATA is None:
        TEXT_SNIPPETS_DATA = data_loader.get_text_snippets() # Loaded for solution_presenter and narrative_builder
    if NAMES_DATA_CACHE is None:
        NAMES_DATA_CACHE = data_loader.get_names_data() # Loaded for narrative_builder
    if VALUE_RANGES_CACHE is None:
        VALUE_RANGES_CACHE = data_loader.get_value_ranges() # Loaded for value_sampler

    # Ensure sub-modules also use cached data if they have their own caches
    # This is already handled by them checking their global cache variables.

# --- Variable Mapping ---
# Maps concept variable names to value_ranges.json keys
# This might need to be more dynamic or extensive based on concept variations.
CONCEPT_VAR_TO_VALUERANGE_KEY = {
    "P": "principal",  # Could also be loan_amount, investment_amount
    "F_simple": "future_value",
    "F_compound": "future_value",
    "F_continuous": "future_value",
    "F_maturity": "future_value", # For Banker's Discount, F is the maturity value
    "F_exact_simple": "future_value",
    "F_ordinary_simple": "future_value",
    "I_simple": "interest_amount",
    "I_exact_simple": "interest_amount",
    "I_ordinary_simple": "interest_amount",
    "Db_discount_amount": "interest_amount", # Or a specific "discount_amount" range
    "P_proceeds": "principal", # Proceeds are a present value

    "i_simple_annual": "simple_interest_rate_annual",
    "i_simple_equivalent": "simple_interest_rate_annual", # Added for CONTINUOUS_COMPOUNDING_EQUIVALENT_SIMPLE_RATE
    "n_time_years": "time_years", # For simple interest
    "n_time_months": "time_months",
    "n_time_days": "time_days",

    "r_nominal_annual": "compound_interest_rate_nominal",
    "t_years": "time_years", # For compound/continuous interest time
    # m_compounding_periods_per_year is handled specially
    "ER": "compound_interest_rate_nominal", # Effective rate is a rate
    "d_discount_rate": "discount_rate_bankers",
    "i_simple_equivalent": "simple_interest_rate_annual" # for comparison rates
}

def generate_problem():
    """
    Generates a complete financial math problem with narrative and solution.
    """
    _load_all_data_cached()
    if not FINANCIAL_CONCEPTS:
        return {"error": "Failed to load financial concepts."}

    selected_concept = random.choice(FINANCIAL_CONCEPTS)
    target_unknown = selected_concept["target_unknown"]

    all_variables_data_formatted = {} # Stores full data dicts from value_sampler, keyed by concept var name
    formula_context_vars = {}       # Stores raw numerical values for formula evaluation, keyed by concept var name

    # 1. Generate Known Values
    # Convert required_knowns to a set for efficient checking and modification
    required_knowns = set(selected_concept.get("required_knowns_for_target", []))
    handled_vars = set()

    # --- BEGIN DATE SPECIFIC LOGIC ---
    # Check if the concept requires start_date and end_date
    if "start_date" in required_knowns and "end_date" in required_knowns:
        # Fetch date generation parameters from value_ranges.json, with defaults
        date_period_config = VALUE_RANGES_CACHE.get("date_period_generation", {})
        min_days_val = date_period_config.get("min_days", 30)
        max_days_val = date_period_config.get("max_days", 730)
        base_year_start_val = date_period_config.get("base_year_start", 1990)
        base_year_end_val = date_period_config.get("base_year_end", 2030)

        start_date_obj, end_date_obj, num_days_val = date_utils.get_random_date_period(
            min_days=min_days_val,
            max_days=max_days_val,
            base_year_range=(base_year_start_val, base_year_end_val)
        )

        # Populate all_variables_data_formatted and formula_context_vars
        all_variables_data_formatted["start_date"] = {'key': 'start_date', 'value': start_date_obj, 'unit': 'date', 'display_precision': None}
        all_variables_data_formatted["end_date"] = {'key': 'end_date', 'value': end_date_obj, 'unit': 'date', 'display_precision': None}
        all_variables_data_formatted["n_time_days"] = {'key': 'n_time_days', 'value': num_days_val, 'unit': 'days', 'display_precision': 0}

        formula_context_vars["start_date"] = start_date_obj
        formula_context_vars["end_date"] = end_date_obj
        formula_context_vars["n_time_days"] = num_days_val
        handled_vars.update(["start_date", "end_date", "n_time_days"]) # Mark these as handled

        # Determine time_base_days and potentially time_base_year_for_exact based on financial_topic
        time_base_days_val = 0
        time_base_year_for_exact_val = None

        if selected_concept["financial_topic"] == "Exact Simple Interest":
            time_base_year_for_exact_val = start_date_obj.year
            time_base_days_val = date_utils.days_in_year(time_base_year_for_exact_val)

            all_variables_data_formatted["time_base_year_for_exact"] = {'key': 'time_base_year_for_exact', 'value': time_base_year_for_exact_val, 'unit': 'year', 'display_precision': 0}
            formula_context_vars["time_base_year_for_exact"] = time_base_year_for_exact_val
            # This variable is derived for solution steps, not typically in 'required_knowns_for_target'

        elif selected_concept["financial_topic"] == "Ordinary Simple Interest":
            time_base_days_val = 360

        # Add time_base_days if it was determined (for Exact or Ordinary)
        if time_base_days_val > 0:
            all_variables_data_formatted["time_base_days"] = {'key': 'time_base_days', 'value': time_base_days_val, 'unit': 'days', 'display_precision': 0}
            formula_context_vars["time_base_days"] = time_base_days_val
            # This variable is derived for formulas/solution steps, not typically in 'required_knowns_for_target'
    # --- END DATE SPECIFIC LOGIC ---

    # Loop through all required knowns for the concept
    for var_key in required_knowns: # Iterate over the original set of required knowns
        if var_key in handled_vars:
            continue # Skip if already handled by the date-specific logic

        if var_key == "m_compounding_periods_per_year":
            comp_freq_data = value_sampler.get_random_compounding_frequency()
            if comp_freq_data:
                all_variables_data_formatted[var_key] = comp_freq_data
                formula_context_vars[var_key] = comp_freq_data["m_value"]
            else:
                return {"error": f"Failed to generate compounding frequency for {selected_concept['concept_id']}"}
        else:
            value_range_key = CONCEPT_VAR_TO_VALUERANGE_KEY.get(var_key)
            if not value_range_key:
                # This might be an intermediate variable that shouldn't be generated directly
                # Or a variable that doesn't directly map to a single range
                print(f"Warning: No value_range_key mapping for required known '{var_key}' in concept '{selected_concept['concept_id']}'. Skipping direct generation.")
                continue

            var_data = value_sampler.get_value_for_variable(value_range_key)
            if var_data:
                all_variables_data_formatted[var_key] = var_data
                formula_context_vars[var_key] = var_data["value"]
            else:
                return {"error": f"Failed to generate value for '{var_key}' (mapped from '{value_range_key}') for concept '{selected_concept['concept_id']}'"}

    # --- BEGIN PLAUSIBILITY CHECKS (e.g., for rate solving) ---
    if selected_concept["concept_id"] in ["COMPOUND_INTEREST_SOLVE_FOR_RATE", "CONTINUOUS_COMPOUNDING_SOLVE_FOR_RATE", "SIMPLE_INTEREST_SOLVE_FOR_RATE_FROM_I"]:
        # Ensure F > P to avoid negative or extremely low rates, unless P and F are very close.
        # This logic assumes 'P' and 'F_compound'/'F_continuous'/'F_simple' are the relevant keys.
        # For SIMPLE_INTEREST_SOLVE_FOR_RATE_FROM_I, it uses P and I_simple. We'd need a different check if I_simple could be negative.

        p_key, f_key = None, None
        if "P" in formula_context_vars:
            p_key = "P"
            if "F_compound" in formula_context_vars: f_key = "F_compound"
            elif "F_continuous" in formula_context_vars: f_key = "F_continuous"
            elif "F_simple" in formula_context_vars: f_key = "F_simple"
            # For SIMPLE_INTEREST_SOLVE_FOR_RATE_FROM_I, F is not directly used, I_simple is.
            # We assume I_simple will be positive. If I_simple is negative, rate will be negative.

        if p_key and f_key:
            max_resample_attempts = 5
            attempt = 0
            # We want F > P for a positive growth rate.
            # If F is very close to P, rate will be very small, which is fine.
            # If F < P, rate will be negative. Let's try to avoid large negative rates by ensuring F is not drastically smaller than P.
            # A simple approach: if F < P, resample F until F > P * 0.8 (allowing for some negative rates but not extreme ones).
            # Or, more simply for now, ensure F > P for typical positive rate problems.
            while formula_context_vars[f_key] <= formula_context_vars[p_key] and attempt < max_resample_attempts:
                print(f"Resampling {f_key} because {f_key} ({formula_context_vars[f_key]}) <= {p_key} ({formula_context_vars[p_key]}) for rate calculation.")
                f_value_range_key = CONCEPT_VAR_TO_VALUERANGE_KEY.get(f_key)
                if f_value_range_key:
                    var_data_f = value_sampler.get_value_for_variable(f_value_range_key)
                    if var_data_f:
                        all_variables_data_formatted[f_key] = var_data_f
                        formula_context_vars[f_key] = var_data_f["value"]
                    else: # Failed to resample F, break to avoid infinite loop
                        break
                else: # No range key for F, break
                    break
                attempt += 1
            if formula_context_vars[f_key] <= formula_context_vars[p_key]:
                 print(f"Warning: Could not ensure {f_key} > {p_key} after {max_resample_attempts} attempts for concept {selected_concept['concept_id']}. Proceeding with current values.")

    # --- END PLAUSIBILITY CHECKS ---

    # 2. Handle Time Conversions (Example for Simple Interest if n_time_years is needed but months/days are used)
    # This logic can be expanded. For now, assume n_time_years is directly generated if listed as required.
    # If a concept *requires* n_time_years, but we want to sometimes *start* from months or days:
    if selected_concept["financial_topic"] == "Simple Interest":
        if "n_time_years" in formula_context_vars and random.random() < 0.3: # 30% chance to convert from months
            original_months_data = value_sampler.get_value_for_variable("time_months")
            if original_months_data:
                all_variables_data_formatted["n_time_months"] = original_months_data
                # Override n_time_years with converted value
                formula_context_vars["n_time_years"] = original_months_data["value"] / 12.0
                # Update the n_time_years entry in all_variables_data_formatted as well
                all_variables_data_formatted["n_time_years"] = {
                    'key': 'time_years', # original key from value_ranges
                    'value': formula_context_vars["n_time_years"],
                    'unit': 'years',
                    'display_precision': all_variables_data_formatted["n_time_years"].get('display_precision', 4) # keep original precision setting
                }
        # Could add similar logic for days -> years conversion here, possibly using date_utils for exact days.

    # 3. Calculate Intermediate Variables defined in the concept's formulas
    # These are formulas NOT for the target_unknown.
    # Ensure they are calculated in a sensible order if there are dependencies.
    # For now, assume formulas are simple enough or ordered correctly in JSON.
    # A more robust way would be to build a dependency graph.

    # Create a list of formulas to evaluate, target last
    formulas_to_eval = []
    if isinstance(selected_concept["formulas"], dict):
        for var, formula_str in selected_concept["formulas"].items():
            if var != target_unknown:
                formulas_to_eval.append((var, formula_str))
        if target_unknown in selected_concept["formulas"]: # Add target formula last
             formulas_to_eval.append((target_unknown, selected_concept["formulas"][target_unknown]))
    else: # Should not happen based on current JSON structure
        return {"error": f"Formulas for concept {selected_concept['concept_id']} are not in expected dict format."}


    calculated_solution_value = None
    calculated_solution_data_formatted = None

    for var_to_calc, formula_str in formulas_to_eval:
        calc_value = formula_evaluator.evaluate_formula(formula_str, formula_context_vars)
        if calc_value is None:
            return {"error": f"Failed to evaluate formula for '{var_to_calc}' in concept '{selected_concept['concept_id']}'. Context: {formula_context_vars}"}

        formula_context_vars[var_to_calc] = calc_value # Add to context for subsequent formulas

        # Create formatted data for this calculated variable (for narrative/solution)
        # Need to determine its type (currency, rate, time, etc.) for proper formatting.
        # We can infer from CONCEPT_VAR_TO_VALUERANGE_KEY or add 'type' to financial_concepts.json vars.
        value_range_key_for_calc_var = CONCEPT_VAR_TO_VALUERANGE_KEY.get(var_to_calc)
        base_config = VALUE_RANGES_CACHE.get(value_range_key_for_calc_var, {}) if VALUE_RANGES_CACHE else {}

        formatted_data_for_calc_var = {
            'key': var_to_calc, # Use the concept's variable name
            'value': calc_value,
            'currency': base_config.get('currency'),
            'unit': base_config.get('unit'),
            'unit_display': base_config.get('unit_display'),
            'display_precision': base_config.get('display_precision', base_config.get('decimals'))
        }
        # If it's a rate, ensure unit_display is set correctly
        if "rate" in var_to_calc.lower() and not formatted_data_for_calc_var.get('unit_display'):
            if value_range_key_for_calc_var and VALUE_RANGES_CACHE and value_range_key_for_calc_var in VALUE_RANGES_CACHE:
                 formatted_data_for_calc_var['unit_display'] = VALUE_RANGES_CACHE[value_range_key_for_calc_var].get('unit_display', '%') # Default to %
            else: # Fallback if no specific range key
                formatted_data_for_calc_var['unit_display'] = '%' if var_to_calc != "n_total_compounding_periods" else "periods"


        all_variables_data_formatted[var_to_calc] = formatted_data_for_calc_var

        if var_to_calc == target_unknown:
            calculated_solution_value = calc_value
            calculated_solution_data_formatted = formatted_data_for_calc_var


    if calculated_solution_value is None:
        return {"error": f"Target unknown '{target_unknown}' was not calculated for concept '{selected_concept['concept_id']}'."}

    # 4. Build Narrative
    problem_narrative = narrative_builder.build_narrative(selected_concept, all_variables_data_formatted, target_unknown)

    # 5. Generate Guided Solution
    solution_steps = solution_presenter.generate_guided_solution(selected_concept, all_variables_data_formatted, calculated_solution_data_formatted)

    return {
        "concept_id": selected_concept["concept_id"],
        "topic": selected_concept["financial_topic"],
        "problem_statement": problem_narrative,
        "target_unknown_key": target_unknown,
        "known_values_data": {k: v for k, v in all_variables_data_formatted.items() if k in selected_concept.get("required_knowns_for_target", []) or k in ["m_compounding_periods_per_year", "n_time_months", "start_date", "end_date"]}, # Show originally generated knowns
        "all_variables_for_solution": all_variables_data_formatted, # Includes intermediates
        "calculated_answer_raw": calculated_solution_value,
        "calculated_answer_formatted": value_sampler.format_value_for_display(calculated_solution_data_formatted),
        "solution_steps": solution_steps
    }


if __name__ == '__main__':
    print("Generating a sample financial problem...\n")
    # Ensure data is loaded for sub-modules if they cache independently on first call
    value_sampler._get_value_ranges_cached()
    narrative_builder._get_names_data_cached()
    narrative_builder._get_text_snippets_cached()
    solution_presenter._get_text_snippets_cached()

    # Load all concepts
    all_concepts = data_loader.get_financial_concepts()
    if not all_concepts:
        print("Failed to load financial concepts for testing. Exiting.")
        sys.exit(1)

    print(f"Found {len(all_concepts)} concepts to test.\n")

    original_financial_concepts_global = FINANCIAL_CONCEPTS # Save the global state if it was set

    for i, concept_to_test in enumerate(all_concepts):
        # Temporarily set the global FINANCIAL_CONCEPTS to only the current concept for generate_problem()
        # This ensures generate_problem picks this specific concept.
        FINANCIAL_CONCEPTS = [concept_to_test]

        print(f"\n--- Testing Concept {i+1}/{len(all_concepts)}: {concept_to_test['concept_id']} ---")
        problem = generate_problem() # generate_problem will use the modified global FINANCIAL_CONCEPTS

        if "error" in problem:
            print(f"Error generating problem: {problem['error']}")
        else:
            print(f"Topic: {problem['topic']}")
            print("\nProblem Statement:")
            print(problem['problem_statement'])

            # Ensure TEXT_SNIPPETS_DATA is loaded for the question part
            if TEXT_SNIPPETS_DATA is None: # Should have been loaded by _load_all_data_cached
                 _load_all_data_cached()

            print(f"\nQuestion: What is the {TEXT_SNIPPETS_DATA['variable_descriptions'].get(problem['target_unknown_key'], problem['target_unknown_key'])}?")

            print("\nGuided Solution:")
            for step in problem['solution_steps']:
                print(step)

            print(f"\nFinal Answer ({problem['target_unknown_key']}): {problem['calculated_answer_formatted']}")
        print("---------------------------------------\n")

    FINANCIAL_CONCEPTS = original_financial_concepts_global # Restore original global state
    print("Completed testing all concepts.")