Source code for negotiation_platform.main

#!/usr/bin/env python3
"""
Negotiation Platform - Main Entry Point
=======================================

This module serves as the primary entry point for the Negotiation Platform,
providing command-line interface, demonstration capabilities, and example
usage patterns for researchers and developers.

Key Features:
    - Command-line interface for quick testing and experimentation
    - Single negotiation runs for focused analysis
    - Comprehensive model comparison across multiple games
    - Interactive mode for guided exploration
    - Configurable logging and output options
    - Integration examples for all platform components

Usage Modes:
    1. Quick Mode: Single negotiation with default settings
    2. Comparison Mode: Full model comparison across games
    3. Interactive Mode: Guided selection of options
    4. Custom Mode: Programmatic usage with specific configurations

Example Command Lines:
    python main.py --quick --models model_a model_b --game company_car
    python main.py --comparison --models model_a model_b model_c
    python main.py --log-level DEBUG

Architecture:
    The main module demonstrates the complete platform initialization
    workflow: ConfigManager -> LLMManager -> GameEngine -> MetricsCalculator
    -> SessionManager -> Results. This pattern should be followed for
    custom integrations and extensions.
"""
#from dotenv import load_dotenv
#load_dotenv()
import argparse
import json
import logging
import sys
import os
from datetime import datetime
from pathlib import Path

# Add parent directory to Python path if running directly
if __name__ == "__main__":
    current_dir = Path(__file__).parent
    parent_dir = current_dir.parent
    if str(parent_dir) not in sys.path:
        sys.path.insert(0, str(parent_dir))

from negotiation_platform.core.llm_manager import LLMManager
from negotiation_platform.core.game_engine import GameEngine
from negotiation_platform.core.metrics_calculator import MetricsCalculator
from negotiation_platform.core.session_manager import SessionManager
from negotiation_platform.core.config_manager import ConfigManager


[docs] def setup_logging(level="INFO"): """ Configure comprehensive logging for the negotiation platform. Sets up dual-output logging (file and console) with detailed formatting for debugging, monitoring, and analysis of negotiation sessions. Args: level (str, optional): Logging level (DEBUG, INFO, WARNING, ERROR). Defaults to "INFO". DEBUG provides detailed execution traces, INFO shows key events and progress, WARNING highlights potential issues, ERROR logs only critical failures. Logging Configuration: - File Output: negotiation_platform.log (persistent record) - Console Output: Real-time feedback during execution - Format: Timestamp - Logger Name - Level - Message - Rotation: Not configured (manual cleanup required) Log Categories: - SessionManager: Negotiation progress and outcomes - LLMManager: Model loading, switching, and memory management - GameEngine: Game creation and state transitions - MetricsCalculator: Performance analysis and calculations - Individual Games: Game-specific events and decisions Example: >>> setup_logging("DEBUG") >>> logger = logging.getLogger(__name__) >>> logger.info("Platform initialized successfully") 2023-12-01 10:30:45,123 - __main__ - INFO - Platform initialized successfully Note: Should be called early in application startup before other components are initialized to ensure all log messages are captured. """ logging.basicConfig( level=getattr(logging, level), format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler('negotiation_platform.log'), logging.StreamHandler() ] )
[docs] def run_single_negotiation(config_manager, models, game_type="company_car"): """ Execute a single negotiation session between two AI models for analysis. Demonstrates the complete negotiation workflow from platform initialization through result analysis. Useful for focused testing, debugging, and detailed analysis of specific model interactions. Args: config_manager (ConfigManager): Initialized configuration manager containing model definitions, game settings, and platform parameters. models (List[str]): List of model identifiers to use as negotiation participants. Only the first two models are used for bilateral games. game_type (str, optional): Type of negotiation game to run. Must be registered in the GameEngine. Options include: - "company_car": Bilateral vehicle price negotiation - "resource_allocation": Multi-resource team distribution - "integrative_negotiations": Multi-issue collaborative negotiation Defaults to "company_car". Returns: Dict[str, Any]: Complete negotiation results containing: - agreement_reached (bool): Whether players reached an agreement - agreement_round (int): Round when agreement was reached - final_utilities (Dict[str, float]): Final utility values per player - metrics (Dict[str, Dict[str, float]]): Computed performance metrics - session_metadata (Dict[str, Any]): Session information and timestamps - actions_history (List[Dict]): Complete action log for analysis Workflow: 1. Initialize all platform components with configurations 2. Set up lazy-loading LLM manager for memory efficiency 3. Create game instance with specified type and configuration 4. Execute negotiation session with turn-based interaction 5. Calculate comprehensive performance metrics 6. Display results and maintain model state for reuse Example: >>> config = ConfigManager() >>> models = ["model_a", "model_b"] >>> result = run_single_negotiation(config, models, "company_car") >>> print(result['agreement_reached']) True >>> print(result['metrics']['utility_surplus']) {'model_a': 2500.0, 'model_b': 1800.0} Performance Notes: - Uses lazy loading to minimize GPU memory usage - Models remain loaded after completion for potential reuse - Detailed logging provides debugging and analysis capabilities - All metrics are calculated automatically for comprehensive analysis Error Handling: Exceptions during negotiation are logged and may result in incomplete results. Check the 'success' field in returned results and logs for error details. """ print(f"\n=== Running Single {game_type.replace('_', ' ').title()} Negotiation ==") # Initialize components llm_manager = LLMManager(config_manager.get_config("model_configs")) game_engine = GameEngine() metrics_calculator = MetricsCalculator() session_manager = SessionManager(llm_manager, game_engine, metrics_calculator) # Don't pre-load models - use lazy loading instead! # Models will be loaded automatically when first used print(f"🔄 Models will be loaded on-demand: {models}") # Run negotiation players = models[:2] # Use first two models as players game_config = config_manager.get_game_config(game_type) result = session_manager.run_negotiation( game_type=game_type, players=players, game_config=game_config ) # Display results print(f"Agreement reached: {result.get('agreement_reached', False)}") if result.get('agreement_reached'): print(f"Agreement round: {result.get('agreement_round', 'N/A')}") print(f"Final utilities: {result.get('final_utilities', {})}") print(f"Metrics: {result.get('metrics', {})}") # Keep models loaded for potential reuse # Only unload when explicitly needed or at program exit print("🔄 Keeping models loaded for potential reuse") return result
[docs] def run_model_comparison(config_manager, models, games=None): """Execute comprehensive multi-model comparison across negotiation games. Performs systematic evaluation of multiple AI models across different negotiation scenarios to assess relative performance, strategy effectiveness, and behavioral consistency. This function implements a rigorous comparison methodology with multiple runs per model pair for statistical reliability. Args: config_manager (ConfigManager): Initialized configuration manager containing model definitions, game configurations, and platform settings. models (List[str]): List of model identifiers to compare. All pairwise combinations will be tested across specified games. games (List[str], optional): List of game types to include in comparison. Defaults to ["company_car", "resource_allocation", "integrative_negotiations"] if not specified. Each game type must be registered in GameEngine. Returns: Dict[str, Dict[str, List[Dict]]]: Hierarchical results structure: - game_type -> model_pair -> list of session results - Each session result contains metrics, outcomes, and metadata - Suitable for statistical analysis and visualization Comparison Methodology: 1. For each game type in the evaluation set 2. Test all unique model pairs (avoiding duplicates) 3. Run multiple sessions per pair for statistical significance 4. Calculate comprehensive metrics for each session 5. Aggregate results with summary statistics 6. Save detailed results to timestamped JSON file Output Artifacts: - Console summary with agreement rates and average metrics - Detailed JSON results file in configured results directory - Individual session logs for debugging and analysis Example: >>> config = ConfigManager() >>> models = ["model_a", "model_b", "model_c"] >>> results = run_model_comparison(config, models) Running Model Comparison Models: ['model_a', 'model_b', 'model_c'] >>> print(results.keys()) dict_keys(['company_car', 'resource_allocation', 'integrative_negotiations']) Performance Considerations: - Models are loaded and unloaded for each pair to manage memory - Multiple runs per pair provide statistical reliability - Results are saved incrementally to prevent data loss - Detailed logging enables progress monitoring Note: This function is designed for research and evaluation purposes. Large model sets or many games may require significant computation time and GPU resources. Consider running in stages for very large evaluations. """ if games is None: games = ["company_car", "resource_allocation", "integrative_negotiations"] print(f"\n=== Running Model Comparison ===") print(f"Models: {models}") print(f"Games: {games}") # Initialize components llm_manager = LLMManager(config_manager.get_config("model_configs")) game_engine = GameEngine() metrics_calculator = MetricsCalculator() session_manager = SessionManager(llm_manager, game_engine, metrics_calculator) comparison_results = {} for game_type in games: print(f"\nTesting {game_type}...") game_config = config_manager.get_game_config(game_type) game_results = {} # Test each model pair for i in range(len(models)): for j in range(i + 1, len(models)): model1, model2 = models[i], models[j] pair_key = f"{model1}_vs_{model2}" print(f" {pair_key}...") # Load models llm_manager.load_model(model1) llm_manager.load_model(model2) # Run multiple sessions for statistical significance pair_results = [] for run in range(3): # 3 runs per pair result = session_manager.run_negotiation( game_type=game_type, players=[model1, model2], game_config=game_config ) pair_results.append(result) game_results[pair_key] = pair_results # Unload models to save memory llm_manager.unload_model(model1) llm_manager.unload_model(model2) comparison_results[game_type] = game_results # Generate summary _generate_comparison_summary(comparison_results) # Save results results_dir = Path(config_manager.get_config("platform_config").get("results_dir", "results")) results_dir.mkdir(exist_ok=True) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") results_file = results_dir / f"model_comparison_{timestamp}.json" with open(results_file, 'w') as f: json.dump(comparison_results, f, indent=2, default=str) print(f"\nResults saved to: {results_file}") return comparison_results
def _generate_comparison_summary(results): """Generate and display comprehensive comparison summary statistics. Processes raw comparison results to calculate and display meaningful summary statistics including agreement rates, average metrics, and performance trends across different model pairs and game types. Args: results (Dict[str, Dict[str, List[Dict]]]): Hierarchical results from run_model_comparison containing game types, model pairs, and individual session results with metrics and outcomes. Output Format: Console display organized by game type showing: - Model pair identifiers (e.g., "model_a_vs_model_b") - Agreement rates as fractions and percentages - Average metric values across all runs for each pair - Performance comparison indicators Summary Statistics: - Agreement Rate: Percentage of sessions reaching successful agreements - Average Metrics: Mean values across runs for each performance metric - Comparative Analysis: Relative performance indicators between pairs Example Output: === COMPARISON SUMMARY === Company Car: model_a_vs_model_b: 2/3 agreements (66.7%) utility_surplus: {'model_a': 1250.0, 'model_b': 980.5} risk_minimization: {'model_a': 85.2, 'model_b': 72.1} model_a_vs_model_c: 3/3 agreements (100.0%) utility_surplus: {'model_a': 1450.2, 'model_c': 1120.8} Note: This function provides immediate feedback during comparison runs and serves as a quick assessment tool before detailed analysis of the saved JSON results. """ print(f"\n=== COMPARISON SUMMARY ===") for game_type, game_results in results.items(): print(f"\n{game_type.replace('_', ' ').title()}:") for pair, pair_results in game_results.items(): agreement_rate = sum(1 for r in pair_results if r.get('agreement_reached', False)) total_runs = len(pair_results) print(f" {pair}: {agreement_rate}/{total_runs} agreements " f"({agreement_rate / total_runs * 100:.1f}%)") # Calculate average metrics if pair_results: avg_metrics = _calculate_average_metrics(pair_results) for metric, values in avg_metrics.items(): print(f" {metric}: {values}") def _calculate_average_metrics(results): """Calculate average metric values across multiple negotiation runs. Processes a collection of negotiation session results to compute mean metric values for each player across all runs. This provides statistical aggregation for reliable performance assessment when multiple runs are conducted for the same model pair. Args: results (List[Dict[str, Any]]): List of individual session results, each containing a 'metrics' dictionary with player-specific metric values (e.g., utility_surplus, risk_minimization). Returns: Dict[str, Dict[str, float]]: Averaged metrics organized as: metric_name -> player_id -> average_value Returns empty dict if no valid results with metrics are provided. Calculation Process: 1. Aggregate metric values across all runs for each player 2. Count valid sessions containing metric data 3. Calculate arithmetic mean for each metric-player combination 4. Handle missing or incomplete metric data gracefully Example: >>> session_results = [ ... {'metrics': {'utility_surplus': {'player1': 100, 'player2': 80}}}, ... {'metrics': {'utility_surplus': {'player1': 120, 'player2': 90}}} ... ] >>> averages = _calculate_average_metrics(session_results) >>> print(averages) {'utility_surplus': {'player1': 110.0, 'player2': 85.0}} Error Handling: - Skips sessions without 'metrics' field - Handles missing players in some sessions - Returns empty dict if no valid data found - Gracefully processes incomplete metric sets Note: This function assumes all metric values are numeric and suitable for arithmetic averaging. Non-numeric metrics are ignored to prevent calculation errors. """ metrics_sums = {} count = 0 for result in results: if result.get('metrics'): count += 1 for metric_name, metric_values in result['metrics'].items(): if metric_name not in metrics_sums: metrics_sums[metric_name] = {} for player, value in metric_values.items(): if player not in metrics_sums[metric_name]: metrics_sums[metric_name][player] = 0 metrics_sums[metric_name][player] += value # Calculate averages avg_metrics = {} if count > 0: for metric_name, player_sums in metrics_sums.items(): avg_metrics[metric_name] = { player: value / count for player, value in player_sums.items() } return avg_metrics
[docs] def main(): """Main application entry point with command-line interface. Provides comprehensive command-line interface for the Negotiation Platform with support for various execution modes including quick testing, systematic model comparison, and interactive exploration. Handles argument parsing, logging configuration, and orchestrates the appropriate execution workflow. Command-Line Options: --quick: Execute single negotiation session for rapid testing --comparison: Run comprehensive multi-model comparison study --models: Specify list of models to use (default: model_a, model_b, model_c) --game: Choose game type for single runs (default: company_car) --log-level: Set logging verbosity (DEBUG, INFO, WARNING, ERROR) Execution Modes: 1. Quick Mode (--quick): Single negotiation with specified models and game 2. Comparison Mode (--comparison): Systematic evaluation across model pairs 3. Interactive Mode (default): User-guided selection of execution options Example Usage: # Quick single negotiation python main.py --quick --models model_a model_b --game company_car # Comprehensive comparison python main.py --comparison --models model_a model_b model_c # Interactive mode with debug logging python main.py --log-level DEBUG Platform Initialization: 1. Configure logging system with specified verbosity level 2. Initialize ConfigManager with default or custom configuration 3. Display available models and confirm selection 4. Execute requested workflow with comprehensive error handling Error Handling: - Graceful handling of KeyboardInterrupt (Ctrl+C) - Comprehensive exception logging with stack traces - Proper cleanup and resource management on exit - User-friendly error messages for common issues Output: - Progress indicators during execution - Summary results and key findings - File locations for detailed results - Success confirmation upon completion Note: This function serves as the primary demonstration of platform capabilities and provides templates for custom integration patterns. For programmatic usage, consider calling individual functions directly rather than using the command-line interface. """ parser = argparse.ArgumentParser(description="Negotiation Platform") parser.add_argument("--quick", action="store_true", help="Run quick single negotiation test") parser.add_argument("--comparison", action="store_true", help="Run full model comparison") parser.add_argument("--models", nargs="+", default=["model_a", "model_b", "model_c"], help="Models to use") parser.add_argument("--game", choices=["company_car", "company_car_arena", "resource_allocation", "integrative_negotiations"], default="company_car", help="Game type for single run") parser.add_argument("--log-level", default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"]) args = parser.parse_args() # Setup setup_logging(args.log_level) config_manager = ConfigManager() print("=== Negotiation Platform ===") print(f"Available models: {list(config_manager.get_config('model_configs').keys())}") print(f"Using models: {args.models}") try: if args.quick: # Quick single run result = run_single_negotiation(config_manager, args.models, args.game) elif args.comparison: # Full comparison results = run_model_comparison(config_manager, args.models) else: # Interactive mode print("\nAvailable options:") print("1. Single negotiation") print("2. Model comparison") choice = input("Choose option (1/2): ").strip() if choice == "1": result = run_single_negotiation(config_manager, args.models, args.game) elif choice == "2": results = run_model_comparison(config_manager, args.models) else: print("Invalid choice") return except KeyboardInterrupt: print("\n\nInterrupted by user") except Exception as e: logging.error(f"Error running platform: {e}") raise print("\nPlatform completed successfully!")
if __name__ == "__main__": main()