This commit marks a major milestone in the problem generator project.
Key systems include:
- Fully functional problem generation for all 23 defined concepts (Simple Interest, Compound Interest, Banker's Discount, Effective Rate, Continuous Compounding, Exact/Ordinary Simple Interest).
- Robust date handling for date-specific interest calculations.
- Improved solution presentation with accurate "Substitute Values" step and complete variable descriptions.
- Interactive problem display in `main.py`: problem statement shown first, question and solution revealed on user input.
- Added plausibility checks for rate calculations to avoid unrealistic negative rates.
- Comprehensive `README.md` update:
- Detailed system architecture (modules, data files, Mermaid diagram).
- List of all currently covered financial concepts.
- Instructions for running and extending the generator.
- Discussion of scope for future enhancements (Equation of Value, Gradients, etc.).
- Refinements to `value_sampler.py` for better formatting and handling of None values.
- Updates to `text_snippets.json` for complete variable descriptions and improved solution step phrasing.
- Updates to `value_ranges.json` for date generation parameters.
- `problem_engine.py` now systematically tests all concepts when run directly.
- Added `uv.lock` to track resolved dependencies.
The system is capable of generating a wide variety of engineering economy problems with detailed, step-by-step solutions and an interactive user experience.
346 lines
19 KiB
Python
346 lines
19 KiB
Python
import sys
|
|
import os
|
|
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
|
|
|
import random
|
|
import math # For direct use if not going through formula_evaluator for simple conversions
|
|
from src import data_loader
|
|
from src import value_sampler
|
|
from src import formula_evaluator
|
|
from src import narrative_builder
|
|
from src import solution_presenter
|
|
from src import date_utils # For potential date-based time calculations
|
|
|
|
# --- Cached Data ---
|
|
FINANCIAL_CONCEPTS = None
|
|
TEXT_SNIPPETS_DATA = None # To avoid conflict with value_sampler's TEXT_SNIPPETS
|
|
NAMES_DATA_CACHE = None
|
|
VALUE_RANGES_CACHE = None
|
|
|
|
def _load_all_data_cached():
|
|
global FINANCIAL_CONCEPTS, TEXT_SNIPPETS_DATA, NAMES_DATA_CACHE, VALUE_RANGES_CACHE
|
|
if FINANCIAL_CONCEPTS is None:
|
|
FINANCIAL_CONCEPTS = data_loader.get_financial_concepts()
|
|
if TEXT_SNIPPETS_DATA is None:
|
|
TEXT_SNIPPETS_DATA = data_loader.get_text_snippets() # Loaded for solution_presenter and narrative_builder
|
|
if NAMES_DATA_CACHE is None:
|
|
NAMES_DATA_CACHE = data_loader.get_names_data() # Loaded for narrative_builder
|
|
if VALUE_RANGES_CACHE is None:
|
|
VALUE_RANGES_CACHE = data_loader.get_value_ranges() # Loaded for value_sampler
|
|
|
|
# Ensure sub-modules also use cached data if they have their own caches
|
|
# This is already handled by them checking their global cache variables.
|
|
|
|
# --- Variable Mapping ---
|
|
# Maps concept variable names to value_ranges.json keys
|
|
# This might need to be more dynamic or extensive based on concept variations.
|
|
CONCEPT_VAR_TO_VALUERANGE_KEY = {
|
|
"P": "principal", # Could also be loan_amount, investment_amount
|
|
"F_simple": "future_value",
|
|
"F_compound": "future_value",
|
|
"F_continuous": "future_value",
|
|
"F_maturity": "future_value", # For Banker's Discount, F is the maturity value
|
|
"F_exact_simple": "future_value",
|
|
"F_ordinary_simple": "future_value",
|
|
"I_simple": "interest_amount",
|
|
"I_exact_simple": "interest_amount",
|
|
"I_ordinary_simple": "interest_amount",
|
|
"Db_discount_amount": "interest_amount", # Or a specific "discount_amount" range
|
|
"P_proceeds": "principal", # Proceeds are a present value
|
|
|
|
"i_simple_annual": "simple_interest_rate_annual",
|
|
"i_simple_equivalent": "simple_interest_rate_annual", # Added for CONTINUOUS_COMPOUNDING_EQUIVALENT_SIMPLE_RATE
|
|
"n_time_years": "time_years", # For simple interest
|
|
"n_time_months": "time_months",
|
|
"n_time_days": "time_days",
|
|
|
|
"r_nominal_annual": "compound_interest_rate_nominal",
|
|
"t_years": "time_years", # For compound/continuous interest time
|
|
# m_compounding_periods_per_year is handled specially
|
|
"ER": "compound_interest_rate_nominal", # Effective rate is a rate
|
|
"d_discount_rate": "discount_rate_bankers",
|
|
"i_simple_equivalent": "simple_interest_rate_annual" # for comparison rates
|
|
}
|
|
|
|
def generate_problem():
|
|
"""
|
|
Generates a complete financial math problem with narrative and solution.
|
|
"""
|
|
_load_all_data_cached()
|
|
if not FINANCIAL_CONCEPTS:
|
|
return {"error": "Failed to load financial concepts."}
|
|
|
|
selected_concept = random.choice(FINANCIAL_CONCEPTS)
|
|
target_unknown = selected_concept["target_unknown"]
|
|
|
|
all_variables_data_formatted = {} # Stores full data dicts from value_sampler, keyed by concept var name
|
|
formula_context_vars = {} # Stores raw numerical values for formula evaluation, keyed by concept var name
|
|
|
|
# 1. Generate Known Values
|
|
# Convert required_knowns to a set for efficient checking and modification
|
|
required_knowns = set(selected_concept.get("required_knowns_for_target", []))
|
|
handled_vars = set()
|
|
|
|
# --- BEGIN DATE SPECIFIC LOGIC ---
|
|
# Check if the concept requires start_date and end_date
|
|
if "start_date" in required_knowns and "end_date" in required_knowns:
|
|
# Fetch date generation parameters from value_ranges.json, with defaults
|
|
date_period_config = VALUE_RANGES_CACHE.get("date_period_generation", {})
|
|
min_days_val = date_period_config.get("min_days", 30)
|
|
max_days_val = date_period_config.get("max_days", 730)
|
|
base_year_start_val = date_period_config.get("base_year_start", 1990)
|
|
base_year_end_val = date_period_config.get("base_year_end", 2030)
|
|
|
|
start_date_obj, end_date_obj, num_days_val = date_utils.get_random_date_period(
|
|
min_days=min_days_val,
|
|
max_days=max_days_val,
|
|
base_year_range=(base_year_start_val, base_year_end_val)
|
|
)
|
|
|
|
# Populate all_variables_data_formatted and formula_context_vars
|
|
all_variables_data_formatted["start_date"] = {'key': 'start_date', 'value': start_date_obj, 'unit': 'date', 'display_precision': None}
|
|
all_variables_data_formatted["end_date"] = {'key': 'end_date', 'value': end_date_obj, 'unit': 'date', 'display_precision': None}
|
|
all_variables_data_formatted["n_time_days"] = {'key': 'n_time_days', 'value': num_days_val, 'unit': 'days', 'display_precision': 0}
|
|
|
|
formula_context_vars["start_date"] = start_date_obj
|
|
formula_context_vars["end_date"] = end_date_obj
|
|
formula_context_vars["n_time_days"] = num_days_val
|
|
handled_vars.update(["start_date", "end_date", "n_time_days"]) # Mark these as handled
|
|
|
|
# Determine time_base_days and potentially time_base_year_for_exact based on financial_topic
|
|
time_base_days_val = 0
|
|
time_base_year_for_exact_val = None
|
|
|
|
if selected_concept["financial_topic"] == "Exact Simple Interest":
|
|
time_base_year_for_exact_val = start_date_obj.year
|
|
time_base_days_val = date_utils.days_in_year(time_base_year_for_exact_val)
|
|
|
|
all_variables_data_formatted["time_base_year_for_exact"] = {'key': 'time_base_year_for_exact', 'value': time_base_year_for_exact_val, 'unit': 'year', 'display_precision': 0}
|
|
formula_context_vars["time_base_year_for_exact"] = time_base_year_for_exact_val
|
|
# This variable is derived for solution steps, not typically in 'required_knowns_for_target'
|
|
|
|
elif selected_concept["financial_topic"] == "Ordinary Simple Interest":
|
|
time_base_days_val = 360
|
|
|
|
# Add time_base_days if it was determined (for Exact or Ordinary)
|
|
if time_base_days_val > 0:
|
|
all_variables_data_formatted["time_base_days"] = {'key': 'time_base_days', 'value': time_base_days_val, 'unit': 'days', 'display_precision': 0}
|
|
formula_context_vars["time_base_days"] = time_base_days_val
|
|
# This variable is derived for formulas/solution steps, not typically in 'required_knowns_for_target'
|
|
# --- END DATE SPECIFIC LOGIC ---
|
|
|
|
# Loop through all required knowns for the concept
|
|
for var_key in required_knowns: # Iterate over the original set of required knowns
|
|
if var_key in handled_vars:
|
|
continue # Skip if already handled by the date-specific logic
|
|
|
|
if var_key == "m_compounding_periods_per_year":
|
|
comp_freq_data = value_sampler.get_random_compounding_frequency()
|
|
if comp_freq_data:
|
|
all_variables_data_formatted[var_key] = comp_freq_data
|
|
formula_context_vars[var_key] = comp_freq_data["m_value"]
|
|
else:
|
|
return {"error": f"Failed to generate compounding frequency for {selected_concept['concept_id']}"}
|
|
else:
|
|
value_range_key = CONCEPT_VAR_TO_VALUERANGE_KEY.get(var_key)
|
|
if not value_range_key:
|
|
# This might be an intermediate variable that shouldn't be generated directly
|
|
# Or a variable that doesn't directly map to a single range
|
|
print(f"Warning: No value_range_key mapping for required known '{var_key}' in concept '{selected_concept['concept_id']}'. Skipping direct generation.")
|
|
continue
|
|
|
|
var_data = value_sampler.get_value_for_variable(value_range_key)
|
|
if var_data:
|
|
all_variables_data_formatted[var_key] = var_data
|
|
formula_context_vars[var_key] = var_data["value"]
|
|
else:
|
|
return {"error": f"Failed to generate value for '{var_key}' (mapped from '{value_range_key}') for concept '{selected_concept['concept_id']}'"}
|
|
|
|
# --- BEGIN PLAUSIBILITY CHECKS (e.g., for rate solving) ---
|
|
if selected_concept["concept_id"] in ["COMPOUND_INTEREST_SOLVE_FOR_RATE", "CONTINUOUS_COMPOUNDING_SOLVE_FOR_RATE", "SIMPLE_INTEREST_SOLVE_FOR_RATE_FROM_I"]:
|
|
# Ensure F > P to avoid negative or extremely low rates, unless P and F are very close.
|
|
# This logic assumes 'P' and 'F_compound'/'F_continuous'/'F_simple' are the relevant keys.
|
|
# For SIMPLE_INTEREST_SOLVE_FOR_RATE_FROM_I, it uses P and I_simple. We'd need a different check if I_simple could be negative.
|
|
|
|
p_key, f_key = None, None
|
|
if "P" in formula_context_vars:
|
|
p_key = "P"
|
|
if "F_compound" in formula_context_vars: f_key = "F_compound"
|
|
elif "F_continuous" in formula_context_vars: f_key = "F_continuous"
|
|
elif "F_simple" in formula_context_vars: f_key = "F_simple"
|
|
# For SIMPLE_INTEREST_SOLVE_FOR_RATE_FROM_I, F is not directly used, I_simple is.
|
|
# We assume I_simple will be positive. If I_simple is negative, rate will be negative.
|
|
|
|
if p_key and f_key:
|
|
max_resample_attempts = 5
|
|
attempt = 0
|
|
# We want F > P for a positive growth rate.
|
|
# If F is very close to P, rate will be very small, which is fine.
|
|
# If F < P, rate will be negative. Let's try to avoid large negative rates by ensuring F is not drastically smaller than P.
|
|
# A simple approach: if F < P, resample F until F > P * 0.8 (allowing for some negative rates but not extreme ones).
|
|
# Or, more simply for now, ensure F > P for typical positive rate problems.
|
|
while formula_context_vars[f_key] <= formula_context_vars[p_key] and attempt < max_resample_attempts:
|
|
print(f"Resampling {f_key} because {f_key} ({formula_context_vars[f_key]}) <= {p_key} ({formula_context_vars[p_key]}) for rate calculation.")
|
|
f_value_range_key = CONCEPT_VAR_TO_VALUERANGE_KEY.get(f_key)
|
|
if f_value_range_key:
|
|
var_data_f = value_sampler.get_value_for_variable(f_value_range_key)
|
|
if var_data_f:
|
|
all_variables_data_formatted[f_key] = var_data_f
|
|
formula_context_vars[f_key] = var_data_f["value"]
|
|
else: # Failed to resample F, break to avoid infinite loop
|
|
break
|
|
else: # No range key for F, break
|
|
break
|
|
attempt += 1
|
|
if formula_context_vars[f_key] <= formula_context_vars[p_key]:
|
|
print(f"Warning: Could not ensure {f_key} > {p_key} after {max_resample_attempts} attempts for concept {selected_concept['concept_id']}. Proceeding with current values.")
|
|
|
|
# --- END PLAUSIBILITY CHECKS ---
|
|
|
|
# 2. Handle Time Conversions (Example for Simple Interest if n_time_years is needed but months/days are used)
|
|
# This logic can be expanded. For now, assume n_time_years is directly generated if listed as required.
|
|
# If a concept *requires* n_time_years, but we want to sometimes *start* from months or days:
|
|
if selected_concept["financial_topic"] == "Simple Interest":
|
|
if "n_time_years" in formula_context_vars and random.random() < 0.3: # 30% chance to convert from months
|
|
original_months_data = value_sampler.get_value_for_variable("time_months")
|
|
if original_months_data:
|
|
all_variables_data_formatted["n_time_months"] = original_months_data
|
|
# Override n_time_years with converted value
|
|
formula_context_vars["n_time_years"] = original_months_data["value"] / 12.0
|
|
# Update the n_time_years entry in all_variables_data_formatted as well
|
|
all_variables_data_formatted["n_time_years"] = {
|
|
'key': 'time_years', # original key from value_ranges
|
|
'value': formula_context_vars["n_time_years"],
|
|
'unit': 'years',
|
|
'display_precision': all_variables_data_formatted["n_time_years"].get('display_precision', 4) # keep original precision setting
|
|
}
|
|
# Could add similar logic for days -> years conversion here, possibly using date_utils for exact days.
|
|
|
|
# 3. Calculate Intermediate Variables defined in the concept's formulas
|
|
# These are formulas NOT for the target_unknown.
|
|
# Ensure they are calculated in a sensible order if there are dependencies.
|
|
# For now, assume formulas are simple enough or ordered correctly in JSON.
|
|
# A more robust way would be to build a dependency graph.
|
|
|
|
# Create a list of formulas to evaluate, target last
|
|
formulas_to_eval = []
|
|
if isinstance(selected_concept["formulas"], dict):
|
|
for var, formula_str in selected_concept["formulas"].items():
|
|
if var != target_unknown:
|
|
formulas_to_eval.append((var, formula_str))
|
|
if target_unknown in selected_concept["formulas"]: # Add target formula last
|
|
formulas_to_eval.append((target_unknown, selected_concept["formulas"][target_unknown]))
|
|
else: # Should not happen based on current JSON structure
|
|
return {"error": f"Formulas for concept {selected_concept['concept_id']} are not in expected dict format."}
|
|
|
|
|
|
calculated_solution_value = None
|
|
calculated_solution_data_formatted = None
|
|
|
|
for var_to_calc, formula_str in formulas_to_eval:
|
|
calc_value = formula_evaluator.evaluate_formula(formula_str, formula_context_vars)
|
|
if calc_value is None:
|
|
return {"error": f"Failed to evaluate formula for '{var_to_calc}' in concept '{selected_concept['concept_id']}'. Context: {formula_context_vars}"}
|
|
|
|
formula_context_vars[var_to_calc] = calc_value # Add to context for subsequent formulas
|
|
|
|
# Create formatted data for this calculated variable (for narrative/solution)
|
|
# Need to determine its type (currency, rate, time, etc.) for proper formatting.
|
|
# We can infer from CONCEPT_VAR_TO_VALUERANGE_KEY or add 'type' to financial_concepts.json vars.
|
|
value_range_key_for_calc_var = CONCEPT_VAR_TO_VALUERANGE_KEY.get(var_to_calc)
|
|
base_config = VALUE_RANGES_CACHE.get(value_range_key_for_calc_var, {}) if VALUE_RANGES_CACHE else {}
|
|
|
|
formatted_data_for_calc_var = {
|
|
'key': var_to_calc, # Use the concept's variable name
|
|
'value': calc_value,
|
|
'currency': base_config.get('currency'),
|
|
'unit': base_config.get('unit'),
|
|
'unit_display': base_config.get('unit_display'),
|
|
'display_precision': base_config.get('display_precision', base_config.get('decimals'))
|
|
}
|
|
# If it's a rate, ensure unit_display is set correctly
|
|
if "rate" in var_to_calc.lower() and not formatted_data_for_calc_var.get('unit_display'):
|
|
if value_range_key_for_calc_var and VALUE_RANGES_CACHE and value_range_key_for_calc_var in VALUE_RANGES_CACHE:
|
|
formatted_data_for_calc_var['unit_display'] = VALUE_RANGES_CACHE[value_range_key_for_calc_var].get('unit_display', '%') # Default to %
|
|
else: # Fallback if no specific range key
|
|
formatted_data_for_calc_var['unit_display'] = '%' if var_to_calc != "n_total_compounding_periods" else "periods"
|
|
|
|
|
|
all_variables_data_formatted[var_to_calc] = formatted_data_for_calc_var
|
|
|
|
if var_to_calc == target_unknown:
|
|
calculated_solution_value = calc_value
|
|
calculated_solution_data_formatted = formatted_data_for_calc_var
|
|
|
|
|
|
if calculated_solution_value is None:
|
|
return {"error": f"Target unknown '{target_unknown}' was not calculated for concept '{selected_concept['concept_id']}'."}
|
|
|
|
# 4. Build Narrative
|
|
problem_narrative = narrative_builder.build_narrative(selected_concept, all_variables_data_formatted, target_unknown)
|
|
|
|
# 5. Generate Guided Solution
|
|
solution_steps = solution_presenter.generate_guided_solution(selected_concept, all_variables_data_formatted, calculated_solution_data_formatted)
|
|
|
|
return {
|
|
"concept_id": selected_concept["concept_id"],
|
|
"topic": selected_concept["financial_topic"],
|
|
"problem_statement": problem_narrative,
|
|
"target_unknown_key": target_unknown,
|
|
"known_values_data": {k: v for k, v in all_variables_data_formatted.items() if k in selected_concept.get("required_knowns_for_target", []) or k in ["m_compounding_periods_per_year", "n_time_months", "start_date", "end_date"]}, # Show originally generated knowns
|
|
"all_variables_for_solution": all_variables_data_formatted, # Includes intermediates
|
|
"calculated_answer_raw": calculated_solution_value,
|
|
"calculated_answer_formatted": value_sampler.format_value_for_display(calculated_solution_data_formatted),
|
|
"solution_steps": solution_steps
|
|
}
|
|
|
|
|
|
if __name__ == '__main__':
|
|
print("Generating a sample financial problem...\n")
|
|
# Ensure data is loaded for sub-modules if they cache independently on first call
|
|
value_sampler._get_value_ranges_cached()
|
|
narrative_builder._get_names_data_cached()
|
|
narrative_builder._get_text_snippets_cached()
|
|
solution_presenter._get_text_snippets_cached()
|
|
|
|
# Load all concepts
|
|
all_concepts = data_loader.get_financial_concepts()
|
|
if not all_concepts:
|
|
print("Failed to load financial concepts for testing. Exiting.")
|
|
sys.exit(1)
|
|
|
|
print(f"Found {len(all_concepts)} concepts to test.\n")
|
|
|
|
original_financial_concepts_global = FINANCIAL_CONCEPTS # Save the global state if it was set
|
|
|
|
for i, concept_to_test in enumerate(all_concepts):
|
|
# Temporarily set the global FINANCIAL_CONCEPTS to only the current concept for generate_problem()
|
|
# This ensures generate_problem picks this specific concept.
|
|
FINANCIAL_CONCEPTS = [concept_to_test]
|
|
|
|
print(f"\n--- Testing Concept {i+1}/{len(all_concepts)}: {concept_to_test['concept_id']} ---")
|
|
problem = generate_problem() # generate_problem will use the modified global FINANCIAL_CONCEPTS
|
|
|
|
if "error" in problem:
|
|
print(f"Error generating problem: {problem['error']}")
|
|
else:
|
|
print(f"Topic: {problem['topic']}")
|
|
print("\nProblem Statement:")
|
|
print(problem['problem_statement'])
|
|
|
|
# Ensure TEXT_SNIPPETS_DATA is loaded for the question part
|
|
if TEXT_SNIPPETS_DATA is None: # Should have been loaded by _load_all_data_cached
|
|
_load_all_data_cached()
|
|
|
|
print(f"\nQuestion: What is the {TEXT_SNIPPETS_DATA['variable_descriptions'].get(problem['target_unknown_key'], problem['target_unknown_key'])}?")
|
|
|
|
print("\nGuided Solution:")
|
|
for step in problem['solution_steps']:
|
|
print(step)
|
|
|
|
print(f"\nFinal Answer ({problem['target_unknown_key']}): {problem['calculated_answer_formatted']}")
|
|
print("---------------------------------------\n")
|
|
|
|
FINANCIAL_CONCEPTS = original_financial_concepts_global # Restore original global state
|
|
print("Completed testing all concepts.")
|