aki f02f848ada feat: Implement full problem generation
This commit marks a major milestone in the problem generator project.

Key systems include:
- Fully functional problem generation for all 23 defined concepts (Simple Interest, Compound Interest, Banker's Discount, Effective Rate, Continuous Compounding, Exact/Ordinary Simple Interest).
- Robust date handling for date-specific interest calculations.
- Improved solution presentation with accurate "Substitute Values" step and complete variable descriptions.
- Interactive problem display in `main.py`: problem statement shown first, question and solution revealed on user input.
- Added plausibility checks for rate calculations to avoid unrealistic negative rates.
- Comprehensive `README.md` update:
    - Detailed system architecture (modules, data files, Mermaid diagram).
    - List of all currently covered financial concepts.
    - Instructions for running and extending the generator.
    - Discussion of scope for future enhancements (Equation of Value, Gradients, etc.).
- Refinements to `value_sampler.py` for better formatting and handling of None values.
- Updates to `text_snippets.json` for complete variable descriptions and improved solution step phrasing.
- Updates to `value_ranges.json` for date generation parameters.
- `problem_engine.py` now systematically tests all concepts when run directly.
- Added `uv.lock` to track resolved dependencies.

The system is capable of generating a wide variety of engineering economy problems with detailed, step-by-step solutions and an interactive user experience.
2025-05-09 11:56:16 +08:00

346 lines
19 KiB
Python

import sys
import os
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
import random
import math # For direct use if not going through formula_evaluator for simple conversions
from src import data_loader
from src import value_sampler
from src import formula_evaluator
from src import narrative_builder
from src import solution_presenter
from src import date_utils # For potential date-based time calculations
# --- Cached Data ---
FINANCIAL_CONCEPTS = None
TEXT_SNIPPETS_DATA = None # To avoid conflict with value_sampler's TEXT_SNIPPETS
NAMES_DATA_CACHE = None
VALUE_RANGES_CACHE = None
def _load_all_data_cached():
global FINANCIAL_CONCEPTS, TEXT_SNIPPETS_DATA, NAMES_DATA_CACHE, VALUE_RANGES_CACHE
if FINANCIAL_CONCEPTS is None:
FINANCIAL_CONCEPTS = data_loader.get_financial_concepts()
if TEXT_SNIPPETS_DATA is None:
TEXT_SNIPPETS_DATA = data_loader.get_text_snippets() # Loaded for solution_presenter and narrative_builder
if NAMES_DATA_CACHE is None:
NAMES_DATA_CACHE = data_loader.get_names_data() # Loaded for narrative_builder
if VALUE_RANGES_CACHE is None:
VALUE_RANGES_CACHE = data_loader.get_value_ranges() # Loaded for value_sampler
# Ensure sub-modules also use cached data if they have their own caches
# This is already handled by them checking their global cache variables.
# --- Variable Mapping ---
# Maps concept variable names to value_ranges.json keys
# This might need to be more dynamic or extensive based on concept variations.
CONCEPT_VAR_TO_VALUERANGE_KEY = {
"P": "principal", # Could also be loan_amount, investment_amount
"F_simple": "future_value",
"F_compound": "future_value",
"F_continuous": "future_value",
"F_maturity": "future_value", # For Banker's Discount, F is the maturity value
"F_exact_simple": "future_value",
"F_ordinary_simple": "future_value",
"I_simple": "interest_amount",
"I_exact_simple": "interest_amount",
"I_ordinary_simple": "interest_amount",
"Db_discount_amount": "interest_amount", # Or a specific "discount_amount" range
"P_proceeds": "principal", # Proceeds are a present value
"i_simple_annual": "simple_interest_rate_annual",
"i_simple_equivalent": "simple_interest_rate_annual", # Added for CONTINUOUS_COMPOUNDING_EQUIVALENT_SIMPLE_RATE
"n_time_years": "time_years", # For simple interest
"n_time_months": "time_months",
"n_time_days": "time_days",
"r_nominal_annual": "compound_interest_rate_nominal",
"t_years": "time_years", # For compound/continuous interest time
# m_compounding_periods_per_year is handled specially
"ER": "compound_interest_rate_nominal", # Effective rate is a rate
"d_discount_rate": "discount_rate_bankers",
"i_simple_equivalent": "simple_interest_rate_annual" # for comparison rates
}
def generate_problem():
"""
Generates a complete financial math problem with narrative and solution.
"""
_load_all_data_cached()
if not FINANCIAL_CONCEPTS:
return {"error": "Failed to load financial concepts."}
selected_concept = random.choice(FINANCIAL_CONCEPTS)
target_unknown = selected_concept["target_unknown"]
all_variables_data_formatted = {} # Stores full data dicts from value_sampler, keyed by concept var name
formula_context_vars = {} # Stores raw numerical values for formula evaluation, keyed by concept var name
# 1. Generate Known Values
# Convert required_knowns to a set for efficient checking and modification
required_knowns = set(selected_concept.get("required_knowns_for_target", []))
handled_vars = set()
# --- BEGIN DATE SPECIFIC LOGIC ---
# Check if the concept requires start_date and end_date
if "start_date" in required_knowns and "end_date" in required_knowns:
# Fetch date generation parameters from value_ranges.json, with defaults
date_period_config = VALUE_RANGES_CACHE.get("date_period_generation", {})
min_days_val = date_period_config.get("min_days", 30)
max_days_val = date_period_config.get("max_days", 730)
base_year_start_val = date_period_config.get("base_year_start", 1990)
base_year_end_val = date_period_config.get("base_year_end", 2030)
start_date_obj, end_date_obj, num_days_val = date_utils.get_random_date_period(
min_days=min_days_val,
max_days=max_days_val,
base_year_range=(base_year_start_val, base_year_end_val)
)
# Populate all_variables_data_formatted and formula_context_vars
all_variables_data_formatted["start_date"] = {'key': 'start_date', 'value': start_date_obj, 'unit': 'date', 'display_precision': None}
all_variables_data_formatted["end_date"] = {'key': 'end_date', 'value': end_date_obj, 'unit': 'date', 'display_precision': None}
all_variables_data_formatted["n_time_days"] = {'key': 'n_time_days', 'value': num_days_val, 'unit': 'days', 'display_precision': 0}
formula_context_vars["start_date"] = start_date_obj
formula_context_vars["end_date"] = end_date_obj
formula_context_vars["n_time_days"] = num_days_val
handled_vars.update(["start_date", "end_date", "n_time_days"]) # Mark these as handled
# Determine time_base_days and potentially time_base_year_for_exact based on financial_topic
time_base_days_val = 0
time_base_year_for_exact_val = None
if selected_concept["financial_topic"] == "Exact Simple Interest":
time_base_year_for_exact_val = start_date_obj.year
time_base_days_val = date_utils.days_in_year(time_base_year_for_exact_val)
all_variables_data_formatted["time_base_year_for_exact"] = {'key': 'time_base_year_for_exact', 'value': time_base_year_for_exact_val, 'unit': 'year', 'display_precision': 0}
formula_context_vars["time_base_year_for_exact"] = time_base_year_for_exact_val
# This variable is derived for solution steps, not typically in 'required_knowns_for_target'
elif selected_concept["financial_topic"] == "Ordinary Simple Interest":
time_base_days_val = 360
# Add time_base_days if it was determined (for Exact or Ordinary)
if time_base_days_val > 0:
all_variables_data_formatted["time_base_days"] = {'key': 'time_base_days', 'value': time_base_days_val, 'unit': 'days', 'display_precision': 0}
formula_context_vars["time_base_days"] = time_base_days_val
# This variable is derived for formulas/solution steps, not typically in 'required_knowns_for_target'
# --- END DATE SPECIFIC LOGIC ---
# Loop through all required knowns for the concept
for var_key in required_knowns: # Iterate over the original set of required knowns
if var_key in handled_vars:
continue # Skip if already handled by the date-specific logic
if var_key == "m_compounding_periods_per_year":
comp_freq_data = value_sampler.get_random_compounding_frequency()
if comp_freq_data:
all_variables_data_formatted[var_key] = comp_freq_data
formula_context_vars[var_key] = comp_freq_data["m_value"]
else:
return {"error": f"Failed to generate compounding frequency for {selected_concept['concept_id']}"}
else:
value_range_key = CONCEPT_VAR_TO_VALUERANGE_KEY.get(var_key)
if not value_range_key:
# This might be an intermediate variable that shouldn't be generated directly
# Or a variable that doesn't directly map to a single range
print(f"Warning: No value_range_key mapping for required known '{var_key}' in concept '{selected_concept['concept_id']}'. Skipping direct generation.")
continue
var_data = value_sampler.get_value_for_variable(value_range_key)
if var_data:
all_variables_data_formatted[var_key] = var_data
formula_context_vars[var_key] = var_data["value"]
else:
return {"error": f"Failed to generate value for '{var_key}' (mapped from '{value_range_key}') for concept '{selected_concept['concept_id']}'"}
# --- BEGIN PLAUSIBILITY CHECKS (e.g., for rate solving) ---
if selected_concept["concept_id"] in ["COMPOUND_INTEREST_SOLVE_FOR_RATE", "CONTINUOUS_COMPOUNDING_SOLVE_FOR_RATE", "SIMPLE_INTEREST_SOLVE_FOR_RATE_FROM_I"]:
# Ensure F > P to avoid negative or extremely low rates, unless P and F are very close.
# This logic assumes 'P' and 'F_compound'/'F_continuous'/'F_simple' are the relevant keys.
# For SIMPLE_INTEREST_SOLVE_FOR_RATE_FROM_I, it uses P and I_simple. We'd need a different check if I_simple could be negative.
p_key, f_key = None, None
if "P" in formula_context_vars:
p_key = "P"
if "F_compound" in formula_context_vars: f_key = "F_compound"
elif "F_continuous" in formula_context_vars: f_key = "F_continuous"
elif "F_simple" in formula_context_vars: f_key = "F_simple"
# For SIMPLE_INTEREST_SOLVE_FOR_RATE_FROM_I, F is not directly used, I_simple is.
# We assume I_simple will be positive. If I_simple is negative, rate will be negative.
if p_key and f_key:
max_resample_attempts = 5
attempt = 0
# We want F > P for a positive growth rate.
# If F is very close to P, rate will be very small, which is fine.
# If F < P, rate will be negative. Let's try to avoid large negative rates by ensuring F is not drastically smaller than P.
# A simple approach: if F < P, resample F until F > P * 0.8 (allowing for some negative rates but not extreme ones).
# Or, more simply for now, ensure F > P for typical positive rate problems.
while formula_context_vars[f_key] <= formula_context_vars[p_key] and attempt < max_resample_attempts:
print(f"Resampling {f_key} because {f_key} ({formula_context_vars[f_key]}) <= {p_key} ({formula_context_vars[p_key]}) for rate calculation.")
f_value_range_key = CONCEPT_VAR_TO_VALUERANGE_KEY.get(f_key)
if f_value_range_key:
var_data_f = value_sampler.get_value_for_variable(f_value_range_key)
if var_data_f:
all_variables_data_formatted[f_key] = var_data_f
formula_context_vars[f_key] = var_data_f["value"]
else: # Failed to resample F, break to avoid infinite loop
break
else: # No range key for F, break
break
attempt += 1
if formula_context_vars[f_key] <= formula_context_vars[p_key]:
print(f"Warning: Could not ensure {f_key} > {p_key} after {max_resample_attempts} attempts for concept {selected_concept['concept_id']}. Proceeding with current values.")
# --- END PLAUSIBILITY CHECKS ---
# 2. Handle Time Conversions (Example for Simple Interest if n_time_years is needed but months/days are used)
# This logic can be expanded. For now, assume n_time_years is directly generated if listed as required.
# If a concept *requires* n_time_years, but we want to sometimes *start* from months or days:
if selected_concept["financial_topic"] == "Simple Interest":
if "n_time_years" in formula_context_vars and random.random() < 0.3: # 30% chance to convert from months
original_months_data = value_sampler.get_value_for_variable("time_months")
if original_months_data:
all_variables_data_formatted["n_time_months"] = original_months_data
# Override n_time_years with converted value
formula_context_vars["n_time_years"] = original_months_data["value"] / 12.0
# Update the n_time_years entry in all_variables_data_formatted as well
all_variables_data_formatted["n_time_years"] = {
'key': 'time_years', # original key from value_ranges
'value': formula_context_vars["n_time_years"],
'unit': 'years',
'display_precision': all_variables_data_formatted["n_time_years"].get('display_precision', 4) # keep original precision setting
}
# Could add similar logic for days -> years conversion here, possibly using date_utils for exact days.
# 3. Calculate Intermediate Variables defined in the concept's formulas
# These are formulas NOT for the target_unknown.
# Ensure they are calculated in a sensible order if there are dependencies.
# For now, assume formulas are simple enough or ordered correctly in JSON.
# A more robust way would be to build a dependency graph.
# Create a list of formulas to evaluate, target last
formulas_to_eval = []
if isinstance(selected_concept["formulas"], dict):
for var, formula_str in selected_concept["formulas"].items():
if var != target_unknown:
formulas_to_eval.append((var, formula_str))
if target_unknown in selected_concept["formulas"]: # Add target formula last
formulas_to_eval.append((target_unknown, selected_concept["formulas"][target_unknown]))
else: # Should not happen based on current JSON structure
return {"error": f"Formulas for concept {selected_concept['concept_id']} are not in expected dict format."}
calculated_solution_value = None
calculated_solution_data_formatted = None
for var_to_calc, formula_str in formulas_to_eval:
calc_value = formula_evaluator.evaluate_formula(formula_str, formula_context_vars)
if calc_value is None:
return {"error": f"Failed to evaluate formula for '{var_to_calc}' in concept '{selected_concept['concept_id']}'. Context: {formula_context_vars}"}
formula_context_vars[var_to_calc] = calc_value # Add to context for subsequent formulas
# Create formatted data for this calculated variable (for narrative/solution)
# Need to determine its type (currency, rate, time, etc.) for proper formatting.
# We can infer from CONCEPT_VAR_TO_VALUERANGE_KEY or add 'type' to financial_concepts.json vars.
value_range_key_for_calc_var = CONCEPT_VAR_TO_VALUERANGE_KEY.get(var_to_calc)
base_config = VALUE_RANGES_CACHE.get(value_range_key_for_calc_var, {}) if VALUE_RANGES_CACHE else {}
formatted_data_for_calc_var = {
'key': var_to_calc, # Use the concept's variable name
'value': calc_value,
'currency': base_config.get('currency'),
'unit': base_config.get('unit'),
'unit_display': base_config.get('unit_display'),
'display_precision': base_config.get('display_precision', base_config.get('decimals'))
}
# If it's a rate, ensure unit_display is set correctly
if "rate" in var_to_calc.lower() and not formatted_data_for_calc_var.get('unit_display'):
if value_range_key_for_calc_var and VALUE_RANGES_CACHE and value_range_key_for_calc_var in VALUE_RANGES_CACHE:
formatted_data_for_calc_var['unit_display'] = VALUE_RANGES_CACHE[value_range_key_for_calc_var].get('unit_display', '%') # Default to %
else: # Fallback if no specific range key
formatted_data_for_calc_var['unit_display'] = '%' if var_to_calc != "n_total_compounding_periods" else "periods"
all_variables_data_formatted[var_to_calc] = formatted_data_for_calc_var
if var_to_calc == target_unknown:
calculated_solution_value = calc_value
calculated_solution_data_formatted = formatted_data_for_calc_var
if calculated_solution_value is None:
return {"error": f"Target unknown '{target_unknown}' was not calculated for concept '{selected_concept['concept_id']}'."}
# 4. Build Narrative
problem_narrative = narrative_builder.build_narrative(selected_concept, all_variables_data_formatted, target_unknown)
# 5. Generate Guided Solution
solution_steps = solution_presenter.generate_guided_solution(selected_concept, all_variables_data_formatted, calculated_solution_data_formatted)
return {
"concept_id": selected_concept["concept_id"],
"topic": selected_concept["financial_topic"],
"problem_statement": problem_narrative,
"target_unknown_key": target_unknown,
"known_values_data": {k: v for k, v in all_variables_data_formatted.items() if k in selected_concept.get("required_knowns_for_target", []) or k in ["m_compounding_periods_per_year", "n_time_months", "start_date", "end_date"]}, # Show originally generated knowns
"all_variables_for_solution": all_variables_data_formatted, # Includes intermediates
"calculated_answer_raw": calculated_solution_value,
"calculated_answer_formatted": value_sampler.format_value_for_display(calculated_solution_data_formatted),
"solution_steps": solution_steps
}
if __name__ == '__main__':
print("Generating a sample financial problem...\n")
# Ensure data is loaded for sub-modules if they cache independently on first call
value_sampler._get_value_ranges_cached()
narrative_builder._get_names_data_cached()
narrative_builder._get_text_snippets_cached()
solution_presenter._get_text_snippets_cached()
# Load all concepts
all_concepts = data_loader.get_financial_concepts()
if not all_concepts:
print("Failed to load financial concepts for testing. Exiting.")
sys.exit(1)
print(f"Found {len(all_concepts)} concepts to test.\n")
original_financial_concepts_global = FINANCIAL_CONCEPTS # Save the global state if it was set
for i, concept_to_test in enumerate(all_concepts):
# Temporarily set the global FINANCIAL_CONCEPTS to only the current concept for generate_problem()
# This ensures generate_problem picks this specific concept.
FINANCIAL_CONCEPTS = [concept_to_test]
print(f"\n--- Testing Concept {i+1}/{len(all_concepts)}: {concept_to_test['concept_id']} ---")
problem = generate_problem() # generate_problem will use the modified global FINANCIAL_CONCEPTS
if "error" in problem:
print(f"Error generating problem: {problem['error']}")
else:
print(f"Topic: {problem['topic']}")
print("\nProblem Statement:")
print(problem['problem_statement'])
# Ensure TEXT_SNIPPETS_DATA is loaded for the question part
if TEXT_SNIPPETS_DATA is None: # Should have been loaded by _load_all_data_cached
_load_all_data_cached()
print(f"\nQuestion: What is the {TEXT_SNIPPETS_DATA['variable_descriptions'].get(problem['target_unknown_key'], problem['target_unknown_key'])}?")
print("\nGuided Solution:")
for step in problem['solution_steps']:
print(step)
print(f"\nFinal Answer ({problem['target_unknown_key']}): {problem['calculated_answer_formatted']}")
print("---------------------------------------\n")
FINANCIAL_CONCEPTS = original_financial_concepts_global # Restore original global state
print("Completed testing all concepts.")