import sys import os sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) import random import math # For direct use if not going through formula_evaluator for simple conversions from src import data_loader from src import value_sampler from src import formula_evaluator from src import narrative_builder from src import solution_presenter from src import date_utils # For potential date-based time calculations # --- Cached Data --- FINANCIAL_CONCEPTS = None TEXT_SNIPPETS_DATA = None # To avoid conflict with value_sampler's TEXT_SNIPPETS NAMES_DATA_CACHE = None VALUE_RANGES_CACHE = None def _load_all_data_cached(): global FINANCIAL_CONCEPTS, TEXT_SNIPPETS_DATA, NAMES_DATA_CACHE, VALUE_RANGES_CACHE if FINANCIAL_CONCEPTS is None: FINANCIAL_CONCEPTS = data_loader.get_financial_concepts() if TEXT_SNIPPETS_DATA is None: TEXT_SNIPPETS_DATA = data_loader.get_text_snippets() # Loaded for solution_presenter and narrative_builder if NAMES_DATA_CACHE is None: NAMES_DATA_CACHE = data_loader.get_names_data() # Loaded for narrative_builder if VALUE_RANGES_CACHE is None: VALUE_RANGES_CACHE = data_loader.get_value_ranges() # Loaded for value_sampler # Ensure sub-modules also use cached data if they have their own caches # This is already handled by them checking their global cache variables. # --- Variable Mapping --- # Maps concept variable names to value_ranges.json keys # This might need to be more dynamic or extensive based on concept variations. CONCEPT_VAR_TO_VALUERANGE_KEY = { "P": "principal", # Could also be loan_amount, investment_amount "F_simple": "future_value", "F_compound": "future_value", "F_continuous": "future_value", "F_maturity": "future_value", # For Banker's Discount, F is the maturity value "F_exact_simple": "future_value", "F_ordinary_simple": "future_value", "I_simple": "interest_amount", "I_exact_simple": "interest_amount", "I_ordinary_simple": "interest_amount", "Db_discount_amount": "interest_amount", # Or a specific "discount_amount" range "P_proceeds": "principal", # Proceeds are a present value "i_simple_annual": "simple_interest_rate_annual", "i_simple_equivalent": "simple_interest_rate_annual", # Added for CONTINUOUS_COMPOUNDING_EQUIVALENT_SIMPLE_RATE "n_time_years": "time_years", # For simple interest "n_time_months": "time_months", "n_time_days": "time_days", "r_nominal_annual": "compound_interest_rate_nominal", "t_years": "time_years", # For compound/continuous interest time # m_compounding_periods_per_year is handled specially "ER": "compound_interest_rate_nominal", # Effective rate is a rate "d_discount_rate": "discount_rate_bankers", "i_simple_equivalent": "simple_interest_rate_annual" # for comparison rates } def generate_problem(): """ Generates a complete financial math problem with narrative and solution. """ _load_all_data_cached() if not FINANCIAL_CONCEPTS: return {"error": "Failed to load financial concepts."} selected_concept = random.choice(FINANCIAL_CONCEPTS) target_unknown = selected_concept["target_unknown"] all_variables_data_formatted = {} # Stores full data dicts from value_sampler, keyed by concept var name formula_context_vars = {} # Stores raw numerical values for formula evaluation, keyed by concept var name # 1. Generate Known Values # Convert required_knowns to a set for efficient checking and modification required_knowns = set(selected_concept.get("required_knowns_for_target", [])) handled_vars = set() # --- BEGIN DATE SPECIFIC LOGIC --- # Check if the concept requires start_date and end_date if "start_date" in required_knowns and "end_date" in required_knowns: # Fetch date generation parameters from value_ranges.json, with defaults date_period_config = VALUE_RANGES_CACHE.get("date_period_generation", {}) min_days_val = date_period_config.get("min_days", 30) max_days_val = date_period_config.get("max_days", 730) base_year_start_val = date_period_config.get("base_year_start", 1990) base_year_end_val = date_period_config.get("base_year_end", 2030) start_date_obj, end_date_obj, num_days_val = date_utils.get_random_date_period( min_days=min_days_val, max_days=max_days_val, base_year_range=(base_year_start_val, base_year_end_val) ) # Populate all_variables_data_formatted and formula_context_vars all_variables_data_formatted["start_date"] = {'key': 'start_date', 'value': start_date_obj, 'unit': 'date', 'display_precision': None} all_variables_data_formatted["end_date"] = {'key': 'end_date', 'value': end_date_obj, 'unit': 'date', 'display_precision': None} all_variables_data_formatted["n_time_days"] = {'key': 'n_time_days', 'value': num_days_val, 'unit': 'days', 'display_precision': 0} formula_context_vars["start_date"] = start_date_obj formula_context_vars["end_date"] = end_date_obj formula_context_vars["n_time_days"] = num_days_val handled_vars.update(["start_date", "end_date", "n_time_days"]) # Mark these as handled # Determine time_base_days and potentially time_base_year_for_exact based on financial_topic time_base_days_val = 0 time_base_year_for_exact_val = None if selected_concept["financial_topic"] == "Exact Simple Interest": time_base_year_for_exact_val = start_date_obj.year time_base_days_val = date_utils.days_in_year(time_base_year_for_exact_val) all_variables_data_formatted["time_base_year_for_exact"] = {'key': 'time_base_year_for_exact', 'value': time_base_year_for_exact_val, 'unit': 'year', 'display_precision': 0} formula_context_vars["time_base_year_for_exact"] = time_base_year_for_exact_val # This variable is derived for solution steps, not typically in 'required_knowns_for_target' elif selected_concept["financial_topic"] == "Ordinary Simple Interest": time_base_days_val = 360 # Add time_base_days if it was determined (for Exact or Ordinary) if time_base_days_val > 0: all_variables_data_formatted["time_base_days"] = {'key': 'time_base_days', 'value': time_base_days_val, 'unit': 'days', 'display_precision': 0} formula_context_vars["time_base_days"] = time_base_days_val # This variable is derived for formulas/solution steps, not typically in 'required_knowns_for_target' # --- END DATE SPECIFIC LOGIC --- # Loop through all required knowns for the concept for var_key in required_knowns: # Iterate over the original set of required knowns if var_key in handled_vars: continue # Skip if already handled by the date-specific logic if var_key == "m_compounding_periods_per_year": comp_freq_data = value_sampler.get_random_compounding_frequency() if comp_freq_data: all_variables_data_formatted[var_key] = comp_freq_data formula_context_vars[var_key] = comp_freq_data["m_value"] else: return {"error": f"Failed to generate compounding frequency for {selected_concept['concept_id']}"} else: value_range_key = CONCEPT_VAR_TO_VALUERANGE_KEY.get(var_key) if not value_range_key: # This might be an intermediate variable that shouldn't be generated directly # Or a variable that doesn't directly map to a single range print(f"Warning: No value_range_key mapping for required known '{var_key}' in concept '{selected_concept['concept_id']}'. Skipping direct generation.") continue var_data = value_sampler.get_value_for_variable(value_range_key) if var_data: all_variables_data_formatted[var_key] = var_data formula_context_vars[var_key] = var_data["value"] else: return {"error": f"Failed to generate value for '{var_key}' (mapped from '{value_range_key}') for concept '{selected_concept['concept_id']}'"} # --- BEGIN PLAUSIBILITY CHECKS (e.g., for rate solving) --- if selected_concept["concept_id"] in ["COMPOUND_INTEREST_SOLVE_FOR_RATE", "CONTINUOUS_COMPOUNDING_SOLVE_FOR_RATE", "SIMPLE_INTEREST_SOLVE_FOR_RATE_FROM_I"]: # Ensure F > P to avoid negative or extremely low rates, unless P and F are very close. # This logic assumes 'P' and 'F_compound'/'F_continuous'/'F_simple' are the relevant keys. # For SIMPLE_INTEREST_SOLVE_FOR_RATE_FROM_I, it uses P and I_simple. We'd need a different check if I_simple could be negative. p_key, f_key = None, None if "P" in formula_context_vars: p_key = "P" if "F_compound" in formula_context_vars: f_key = "F_compound" elif "F_continuous" in formula_context_vars: f_key = "F_continuous" elif "F_simple" in formula_context_vars: f_key = "F_simple" # For SIMPLE_INTEREST_SOLVE_FOR_RATE_FROM_I, F is not directly used, I_simple is. # We assume I_simple will be positive. If I_simple is negative, rate will be negative. if p_key and f_key: max_resample_attempts = 5 attempt = 0 # We want F > P for a positive growth rate. # If F is very close to P, rate will be very small, which is fine. # If F < P, rate will be negative. Let's try to avoid large negative rates by ensuring F is not drastically smaller than P. # A simple approach: if F < P, resample F until F > P * 0.8 (allowing for some negative rates but not extreme ones). # Or, more simply for now, ensure F > P for typical positive rate problems. while formula_context_vars[f_key] <= formula_context_vars[p_key] and attempt < max_resample_attempts: print(f"Resampling {f_key} because {f_key} ({formula_context_vars[f_key]}) <= {p_key} ({formula_context_vars[p_key]}) for rate calculation.") f_value_range_key = CONCEPT_VAR_TO_VALUERANGE_KEY.get(f_key) if f_value_range_key: var_data_f = value_sampler.get_value_for_variable(f_value_range_key) if var_data_f: all_variables_data_formatted[f_key] = var_data_f formula_context_vars[f_key] = var_data_f["value"] else: # Failed to resample F, break to avoid infinite loop break else: # No range key for F, break break attempt += 1 if formula_context_vars[f_key] <= formula_context_vars[p_key]: print(f"Warning: Could not ensure {f_key} > {p_key} after {max_resample_attempts} attempts for concept {selected_concept['concept_id']}. Proceeding with current values.") # --- END PLAUSIBILITY CHECKS --- # 2. Handle Time Conversions (Example for Simple Interest if n_time_years is needed but months/days are used) # This logic can be expanded. For now, assume n_time_years is directly generated if listed as required. # If a concept *requires* n_time_years, but we want to sometimes *start* from months or days: if selected_concept["financial_topic"] == "Simple Interest": if "n_time_years" in formula_context_vars and random.random() < 0.3: # 30% chance to convert from months original_months_data = value_sampler.get_value_for_variable("time_months") if original_months_data: all_variables_data_formatted["n_time_months"] = original_months_data # Override n_time_years with converted value formula_context_vars["n_time_years"] = original_months_data["value"] / 12.0 # Update the n_time_years entry in all_variables_data_formatted as well all_variables_data_formatted["n_time_years"] = { 'key': 'time_years', # original key from value_ranges 'value': formula_context_vars["n_time_years"], 'unit': 'years', 'display_precision': all_variables_data_formatted["n_time_years"].get('display_precision', 4) # keep original precision setting } # Could add similar logic for days -> years conversion here, possibly using date_utils for exact days. # 3. Calculate Intermediate Variables defined in the concept's formulas # These are formulas NOT for the target_unknown. # Ensure they are calculated in a sensible order if there are dependencies. # For now, assume formulas are simple enough or ordered correctly in JSON. # A more robust way would be to build a dependency graph. # Create a list of formulas to evaluate, target last formulas_to_eval = [] if isinstance(selected_concept["formulas"], dict): for var, formula_str in selected_concept["formulas"].items(): if var != target_unknown: formulas_to_eval.append((var, formula_str)) if target_unknown in selected_concept["formulas"]: # Add target formula last formulas_to_eval.append((target_unknown, selected_concept["formulas"][target_unknown])) else: # Should not happen based on current JSON structure return {"error": f"Formulas for concept {selected_concept['concept_id']} are not in expected dict format."} calculated_solution_value = None calculated_solution_data_formatted = None for var_to_calc, formula_str in formulas_to_eval: calc_value = formula_evaluator.evaluate_formula(formula_str, formula_context_vars) if calc_value is None: return {"error": f"Failed to evaluate formula for '{var_to_calc}' in concept '{selected_concept['concept_id']}'. Context: {formula_context_vars}"} formula_context_vars[var_to_calc] = calc_value # Add to context for subsequent formulas # Create formatted data for this calculated variable (for narrative/solution) # Need to determine its type (currency, rate, time, etc.) for proper formatting. # We can infer from CONCEPT_VAR_TO_VALUERANGE_KEY or add 'type' to financial_concepts.json vars. value_range_key_for_calc_var = CONCEPT_VAR_TO_VALUERANGE_KEY.get(var_to_calc) base_config = VALUE_RANGES_CACHE.get(value_range_key_for_calc_var, {}) if VALUE_RANGES_CACHE else {} formatted_data_for_calc_var = { 'key': var_to_calc, # Use the concept's variable name 'value': calc_value, 'currency': base_config.get('currency'), 'unit': base_config.get('unit'), 'unit_display': base_config.get('unit_display'), 'display_precision': base_config.get('display_precision', base_config.get('decimals')) } # If it's a rate, ensure unit_display is set correctly if "rate" in var_to_calc.lower() and not formatted_data_for_calc_var.get('unit_display'): if value_range_key_for_calc_var and VALUE_RANGES_CACHE and value_range_key_for_calc_var in VALUE_RANGES_CACHE: formatted_data_for_calc_var['unit_display'] = VALUE_RANGES_CACHE[value_range_key_for_calc_var].get('unit_display', '%') # Default to % else: # Fallback if no specific range key formatted_data_for_calc_var['unit_display'] = '%' if var_to_calc != "n_total_compounding_periods" else "periods" all_variables_data_formatted[var_to_calc] = formatted_data_for_calc_var if var_to_calc == target_unknown: calculated_solution_value = calc_value calculated_solution_data_formatted = formatted_data_for_calc_var if calculated_solution_value is None: return {"error": f"Target unknown '{target_unknown}' was not calculated for concept '{selected_concept['concept_id']}'."} # 4. Build Narrative problem_narrative = narrative_builder.build_narrative(selected_concept, all_variables_data_formatted, target_unknown) # 5. Generate Guided Solution solution_steps = solution_presenter.generate_guided_solution(selected_concept, all_variables_data_formatted, calculated_solution_data_formatted) return { "concept_id": selected_concept["concept_id"], "topic": selected_concept["financial_topic"], "problem_statement": problem_narrative, "target_unknown_key": target_unknown, "known_values_data": {k: v for k, v in all_variables_data_formatted.items() if k in selected_concept.get("required_knowns_for_target", []) or k in ["m_compounding_periods_per_year", "n_time_months", "start_date", "end_date"]}, # Show originally generated knowns "all_variables_for_solution": all_variables_data_formatted, # Includes intermediates "calculated_answer_raw": calculated_solution_value, "calculated_answer_formatted": value_sampler.format_value_for_display(calculated_solution_data_formatted), "solution_steps": solution_steps } if __name__ == '__main__': print("Generating a sample financial problem...\n") # Ensure data is loaded for sub-modules if they cache independently on first call value_sampler._get_value_ranges_cached() narrative_builder._get_names_data_cached() narrative_builder._get_text_snippets_cached() solution_presenter._get_text_snippets_cached() # Load all concepts all_concepts = data_loader.get_financial_concepts() if not all_concepts: print("Failed to load financial concepts for testing. Exiting.") sys.exit(1) print(f"Found {len(all_concepts)} concepts to test.\n") original_financial_concepts_global = FINANCIAL_CONCEPTS # Save the global state if it was set for i, concept_to_test in enumerate(all_concepts): # Temporarily set the global FINANCIAL_CONCEPTS to only the current concept for generate_problem() # This ensures generate_problem picks this specific concept. FINANCIAL_CONCEPTS = [concept_to_test] print(f"\n--- Testing Concept {i+1}/{len(all_concepts)}: {concept_to_test['concept_id']} ---") problem = generate_problem() # generate_problem will use the modified global FINANCIAL_CONCEPTS if "error" in problem: print(f"Error generating problem: {problem['error']}") else: print(f"Topic: {problem['topic']}") print("\nProblem Statement:") print(problem['problem_statement']) # Ensure TEXT_SNIPPETS_DATA is loaded for the question part if TEXT_SNIPPETS_DATA is None: # Should have been loaded by _load_all_data_cached _load_all_data_cached() print(f"\nQuestion: What is the {TEXT_SNIPPETS_DATA['variable_descriptions'].get(problem['target_unknown_key'], problem['target_unknown_key'])}?") print("\nGuided Solution:") for step in problem['solution_steps']: print(step) print(f"\nFinal Answer ({problem['target_unknown_key']}): {problem['calculated_answer_formatted']}") print("---------------------------------------\n") FINANCIAL_CONCEPTS = original_financial_concepts_global # Restore original global state print("Completed testing all concepts.")