#!/usr/bin/env python3
"""
Negotiation Platform - Main Entry Point
=======================================
This module serves as the primary entry point for the Negotiation Platform,
providing command-line interface, demonstration capabilities, and example
usage patterns for researchers and developers.
Key Features:
- Command-line interface for quick testing and experimentation
- Single negotiation runs for focused analysis
- Comprehensive model comparison across multiple games
- Interactive mode for guided exploration
- Configurable logging and output options
- Integration examples for all platform components
Usage Modes:
1. Quick Mode: Single negotiation with default settings
2. Comparison Mode: Full model comparison across games
3. Interactive Mode: Guided selection of options
4. Custom Mode: Programmatic usage with specific configurations
Example Command Lines:
python main.py --quick --models model_a model_b --game company_car
python main.py --comparison --models model_a model_b model_c
python main.py --log-level DEBUG
Architecture:
The main module demonstrates the complete platform initialization
workflow: ConfigManager -> LLMManager -> GameEngine -> MetricsCalculator
-> SessionManager -> Results. This pattern should be followed for
custom integrations and extensions.
"""
#from dotenv import load_dotenv
#load_dotenv()
import argparse
import json
import logging
import sys
import os
from datetime import datetime
from pathlib import Path
# Add parent directory to Python path if running directly
if __name__ == "__main__":
current_dir = Path(__file__).parent
parent_dir = current_dir.parent
if str(parent_dir) not in sys.path:
sys.path.insert(0, str(parent_dir))
from negotiation_platform.core.llm_manager import LLMManager
from negotiation_platform.core.game_engine import GameEngine
from negotiation_platform.core.metrics_calculator import MetricsCalculator
from negotiation_platform.core.session_manager import SessionManager
from negotiation_platform.core.config_manager import ConfigManager
[docs]
def setup_logging(level="INFO"):
"""
Configure comprehensive logging for the negotiation platform.
Sets up dual-output logging (file and console) with detailed formatting
for debugging, monitoring, and analysis of negotiation sessions.
Args:
level (str, optional): Logging level (DEBUG, INFO, WARNING, ERROR).
Defaults to "INFO". DEBUG provides detailed execution traces,
INFO shows key events and progress, WARNING highlights potential
issues, ERROR logs only critical failures.
Logging Configuration:
- File Output: negotiation_platform.log (persistent record)
- Console Output: Real-time feedback during execution
- Format: Timestamp - Logger Name - Level - Message
- Rotation: Not configured (manual cleanup required)
Log Categories:
- SessionManager: Negotiation progress and outcomes
- LLMManager: Model loading, switching, and memory management
- GameEngine: Game creation and state transitions
- MetricsCalculator: Performance analysis and calculations
- Individual Games: Game-specific events and decisions
Example:
>>> setup_logging("DEBUG")
>>> logger = logging.getLogger(__name__)
>>> logger.info("Platform initialized successfully")
2023-12-01 10:30:45,123 - __main__ - INFO - Platform initialized successfully
Note:
Should be called early in application startup before other components
are initialized to ensure all log messages are captured.
"""
logging.basicConfig(
level=getattr(logging, level),
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('negotiation_platform.log'),
logging.StreamHandler()
]
)
[docs]
def run_single_negotiation(config_manager, models, game_type="company_car"):
"""
Execute a single negotiation session between two AI models for analysis.
Demonstrates the complete negotiation workflow from platform initialization
through result analysis. Useful for focused testing, debugging, and detailed
analysis of specific model interactions.
Args:
config_manager (ConfigManager): Initialized configuration manager containing
model definitions, game settings, and platform parameters.
models (List[str]): List of model identifiers to use as negotiation
participants. Only the first two models are used for bilateral games.
game_type (str, optional): Type of negotiation game to run. Must be
registered in the GameEngine. Options include:
- "company_car": Bilateral vehicle price negotiation
- "resource_allocation": Multi-resource team distribution
- "integrative_negotiations": Multi-issue collaborative negotiation
Defaults to "company_car".
Returns:
Dict[str, Any]: Complete negotiation results containing:
- agreement_reached (bool): Whether players reached an agreement
- agreement_round (int): Round when agreement was reached
- final_utilities (Dict[str, float]): Final utility values per player
- metrics (Dict[str, Dict[str, float]]): Computed performance metrics
- session_metadata (Dict[str, Any]): Session information and timestamps
- actions_history (List[Dict]): Complete action log for analysis
Workflow:
1. Initialize all platform components with configurations
2. Set up lazy-loading LLM manager for memory efficiency
3. Create game instance with specified type and configuration
4. Execute negotiation session with turn-based interaction
5. Calculate comprehensive performance metrics
6. Display results and maintain model state for reuse
Example:
>>> config = ConfigManager()
>>> models = ["model_a", "model_b"]
>>> result = run_single_negotiation(config, models, "company_car")
>>> print(result['agreement_reached'])
True
>>> print(result['metrics']['utility_surplus'])
{'model_a': 2500.0, 'model_b': 1800.0}
Performance Notes:
- Uses lazy loading to minimize GPU memory usage
- Models remain loaded after completion for potential reuse
- Detailed logging provides debugging and analysis capabilities
- All metrics are calculated automatically for comprehensive analysis
Error Handling:
Exceptions during negotiation are logged and may result in incomplete
results. Check the 'success' field in returned results and logs for
error details.
"""
print(f"\n=== Running Single {game_type.replace('_', ' ').title()} Negotiation ==")
# Initialize components
llm_manager = LLMManager(config_manager.get_config("model_configs"))
game_engine = GameEngine()
metrics_calculator = MetricsCalculator()
session_manager = SessionManager(llm_manager, game_engine, metrics_calculator)
# Don't pre-load models - use lazy loading instead!
# Models will be loaded automatically when first used
print(f"🔄 Models will be loaded on-demand: {models}")
# Run negotiation
players = models[:2] # Use first two models as players
game_config = config_manager.get_game_config(game_type)
result = session_manager.run_negotiation(
game_type=game_type,
players=players,
game_config=game_config
)
# Display results
print(f"Agreement reached: {result.get('agreement_reached', False)}")
if result.get('agreement_reached'):
print(f"Agreement round: {result.get('agreement_round', 'N/A')}")
print(f"Final utilities: {result.get('final_utilities', {})}")
print(f"Metrics: {result.get('metrics', {})}")
# Keep models loaded for potential reuse
# Only unload when explicitly needed or at program exit
print("🔄 Keeping models loaded for potential reuse")
return result
[docs]
def run_model_comparison(config_manager, models, games=None):
"""Execute comprehensive multi-model comparison across negotiation games.
Performs systematic evaluation of multiple AI models across different
negotiation scenarios to assess relative performance, strategy effectiveness,
and behavioral consistency. This function implements a rigorous comparison
methodology with multiple runs per model pair for statistical reliability.
Args:
config_manager (ConfigManager): Initialized configuration manager containing
model definitions, game configurations, and platform settings.
models (List[str]): List of model identifiers to compare. All pairwise
combinations will be tested across specified games.
games (List[str], optional): List of game types to include in comparison.
Defaults to ["company_car", "resource_allocation", "integrative_negotiations"]
if not specified. Each game type must be registered in GameEngine.
Returns:
Dict[str, Dict[str, List[Dict]]]: Hierarchical results structure:
- game_type -> model_pair -> list of session results
- Each session result contains metrics, outcomes, and metadata
- Suitable for statistical analysis and visualization
Comparison Methodology:
1. For each game type in the evaluation set
2. Test all unique model pairs (avoiding duplicates)
3. Run multiple sessions per pair for statistical significance
4. Calculate comprehensive metrics for each session
5. Aggregate results with summary statistics
6. Save detailed results to timestamped JSON file
Output Artifacts:
- Console summary with agreement rates and average metrics
- Detailed JSON results file in configured results directory
- Individual session logs for debugging and analysis
Example:
>>> config = ConfigManager()
>>> models = ["model_a", "model_b", "model_c"]
>>> results = run_model_comparison(config, models)
Running Model Comparison
Models: ['model_a', 'model_b', 'model_c']
>>> print(results.keys())
dict_keys(['company_car', 'resource_allocation', 'integrative_negotiations'])
Performance Considerations:
- Models are loaded and unloaded for each pair to manage memory
- Multiple runs per pair provide statistical reliability
- Results are saved incrementally to prevent data loss
- Detailed logging enables progress monitoring
Note:
This function is designed for research and evaluation purposes.
Large model sets or many games may require significant computation
time and GPU resources. Consider running in stages for very large
evaluations.
"""
if games is None:
games = ["company_car", "resource_allocation", "integrative_negotiations"]
print(f"\n=== Running Model Comparison ===")
print(f"Models: {models}")
print(f"Games: {games}")
# Initialize components
llm_manager = LLMManager(config_manager.get_config("model_configs"))
game_engine = GameEngine()
metrics_calculator = MetricsCalculator()
session_manager = SessionManager(llm_manager, game_engine, metrics_calculator)
comparison_results = {}
for game_type in games:
print(f"\nTesting {game_type}...")
game_config = config_manager.get_game_config(game_type)
game_results = {}
# Test each model pair
for i in range(len(models)):
for j in range(i + 1, len(models)):
model1, model2 = models[i], models[j]
pair_key = f"{model1}_vs_{model2}"
print(f" {pair_key}...")
# Load models
llm_manager.load_model(model1)
llm_manager.load_model(model2)
# Run multiple sessions for statistical significance
pair_results = []
for run in range(3): # 3 runs per pair
result = session_manager.run_negotiation(
game_type=game_type,
players=[model1, model2],
game_config=game_config
)
pair_results.append(result)
game_results[pair_key] = pair_results
# Unload models to save memory
llm_manager.unload_model(model1)
llm_manager.unload_model(model2)
comparison_results[game_type] = game_results
# Generate summary
_generate_comparison_summary(comparison_results)
# Save results
results_dir = Path(config_manager.get_config("platform_config").get("results_dir", "results"))
results_dir.mkdir(exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
results_file = results_dir / f"model_comparison_{timestamp}.json"
with open(results_file, 'w') as f:
json.dump(comparison_results, f, indent=2, default=str)
print(f"\nResults saved to: {results_file}")
return comparison_results
def _generate_comparison_summary(results):
"""Generate and display comprehensive comparison summary statistics.
Processes raw comparison results to calculate and display meaningful
summary statistics including agreement rates, average metrics, and
performance trends across different model pairs and game types.
Args:
results (Dict[str, Dict[str, List[Dict]]]): Hierarchical results from
run_model_comparison containing game types, model pairs, and
individual session results with metrics and outcomes.
Output Format:
Console display organized by game type showing:
- Model pair identifiers (e.g., "model_a_vs_model_b")
- Agreement rates as fractions and percentages
- Average metric values across all runs for each pair
- Performance comparison indicators
Summary Statistics:
- Agreement Rate: Percentage of sessions reaching successful agreements
- Average Metrics: Mean values across runs for each performance metric
- Comparative Analysis: Relative performance indicators between pairs
Example Output:
=== COMPARISON SUMMARY ===
Company Car:
model_a_vs_model_b: 2/3 agreements (66.7%)
utility_surplus: {'model_a': 1250.0, 'model_b': 980.5}
risk_minimization: {'model_a': 85.2, 'model_b': 72.1}
model_a_vs_model_c: 3/3 agreements (100.0%)
utility_surplus: {'model_a': 1450.2, 'model_c': 1120.8}
Note:
This function provides immediate feedback during comparison runs
and serves as a quick assessment tool before detailed analysis
of the saved JSON results.
"""
print(f"\n=== COMPARISON SUMMARY ===")
for game_type, game_results in results.items():
print(f"\n{game_type.replace('_', ' ').title()}:")
for pair, pair_results in game_results.items():
agreement_rate = sum(1 for r in pair_results if r.get('agreement_reached', False))
total_runs = len(pair_results)
print(f" {pair}: {agreement_rate}/{total_runs} agreements "
f"({agreement_rate / total_runs * 100:.1f}%)")
# Calculate average metrics
if pair_results:
avg_metrics = _calculate_average_metrics(pair_results)
for metric, values in avg_metrics.items():
print(f" {metric}: {values}")
def _calculate_average_metrics(results):
"""Calculate average metric values across multiple negotiation runs.
Processes a collection of negotiation session results to compute mean
metric values for each player across all runs. This provides statistical
aggregation for reliable performance assessment when multiple runs are
conducted for the same model pair.
Args:
results (List[Dict[str, Any]]): List of individual session results,
each containing a 'metrics' dictionary with player-specific
metric values (e.g., utility_surplus, risk_minimization).
Returns:
Dict[str, Dict[str, float]]: Averaged metrics organized as:
metric_name -> player_id -> average_value
Returns empty dict if no valid results with metrics are provided.
Calculation Process:
1. Aggregate metric values across all runs for each player
2. Count valid sessions containing metric data
3. Calculate arithmetic mean for each metric-player combination
4. Handle missing or incomplete metric data gracefully
Example:
>>> session_results = [
... {'metrics': {'utility_surplus': {'player1': 100, 'player2': 80}}},
... {'metrics': {'utility_surplus': {'player1': 120, 'player2': 90}}}
... ]
>>> averages = _calculate_average_metrics(session_results)
>>> print(averages)
{'utility_surplus': {'player1': 110.0, 'player2': 85.0}}
Error Handling:
- Skips sessions without 'metrics' field
- Handles missing players in some sessions
- Returns empty dict if no valid data found
- Gracefully processes incomplete metric sets
Note:
This function assumes all metric values are numeric and suitable
for arithmetic averaging. Non-numeric metrics are ignored to
prevent calculation errors.
"""
metrics_sums = {}
count = 0
for result in results:
if result.get('metrics'):
count += 1
for metric_name, metric_values in result['metrics'].items():
if metric_name not in metrics_sums:
metrics_sums[metric_name] = {}
for player, value in metric_values.items():
if player not in metrics_sums[metric_name]:
metrics_sums[metric_name][player] = 0
metrics_sums[metric_name][player] += value
# Calculate averages
avg_metrics = {}
if count > 0:
for metric_name, player_sums in metrics_sums.items():
avg_metrics[metric_name] = {
player: value / count for player, value in player_sums.items()
}
return avg_metrics
[docs]
def main():
"""Main application entry point with command-line interface.
Provides comprehensive command-line interface for the Negotiation Platform
with support for various execution modes including quick testing, systematic
model comparison, and interactive exploration. Handles argument parsing,
logging configuration, and orchestrates the appropriate execution workflow.
Command-Line Options:
--quick: Execute single negotiation session for rapid testing
--comparison: Run comprehensive multi-model comparison study
--models: Specify list of models to use (default: model_a, model_b, model_c)
--game: Choose game type for single runs (default: company_car)
--log-level: Set logging verbosity (DEBUG, INFO, WARNING, ERROR)
Execution Modes:
1. Quick Mode (--quick): Single negotiation with specified models and game
2. Comparison Mode (--comparison): Systematic evaluation across model pairs
3. Interactive Mode (default): User-guided selection of execution options
Example Usage:
# Quick single negotiation
python main.py --quick --models model_a model_b --game company_car
# Comprehensive comparison
python main.py --comparison --models model_a model_b model_c
# Interactive mode with debug logging
python main.py --log-level DEBUG
Platform Initialization:
1. Configure logging system with specified verbosity level
2. Initialize ConfigManager with default or custom configuration
3. Display available models and confirm selection
4. Execute requested workflow with comprehensive error handling
Error Handling:
- Graceful handling of KeyboardInterrupt (Ctrl+C)
- Comprehensive exception logging with stack traces
- Proper cleanup and resource management on exit
- User-friendly error messages for common issues
Output:
- Progress indicators during execution
- Summary results and key findings
- File locations for detailed results
- Success confirmation upon completion
Note:
This function serves as the primary demonstration of platform
capabilities and provides templates for custom integration patterns.
For programmatic usage, consider calling individual functions directly
rather than using the command-line interface.
"""
parser = argparse.ArgumentParser(description="Negotiation Platform")
parser.add_argument("--quick", action="store_true",
help="Run quick single negotiation test")
parser.add_argument("--comparison", action="store_true",
help="Run full model comparison")
parser.add_argument("--models", nargs="+",
default=["model_a", "model_b", "model_c"],
help="Models to use")
parser.add_argument("--game", choices=["company_car", "company_car_arena", "resource_allocation", "integrative_negotiations"],
default="company_car", help="Game type for single run")
parser.add_argument("--log-level", default="INFO",
choices=["DEBUG", "INFO", "WARNING", "ERROR"])
args = parser.parse_args()
# Setup
setup_logging(args.log_level)
config_manager = ConfigManager()
print("=== Negotiation Platform ===")
print(f"Available models: {list(config_manager.get_config('model_configs').keys())}")
print(f"Using models: {args.models}")
try:
if args.quick:
# Quick single run
result = run_single_negotiation(config_manager, args.models, args.game)
elif args.comparison:
# Full comparison
results = run_model_comparison(config_manager, args.models)
else:
# Interactive mode
print("\nAvailable options:")
print("1. Single negotiation")
print("2. Model comparison")
choice = input("Choose option (1/2): ").strip()
if choice == "1":
result = run_single_negotiation(config_manager, args.models, args.game)
elif choice == "2":
results = run_model_comparison(config_manager, args.models)
else:
print("Invalid choice")
return
except KeyboardInterrupt:
print("\n\nInterrupted by user")
except Exception as e:
logging.error(f"Error running platform: {e}")
raise
print("\nPlatform completed successfully!")
if __name__ == "__main__":
main()