Source code for results.metrics_statistics

"""
Negotiation Metrics Analysis and Statistics
===========================================

This module provides comprehensive statistical analysis tools for negotiation
metrics and performance data. It processes negotiation logs and computes
advanced statistical measures for research and evaluation purposes.

Key Features:
    - Log parsing and data extraction from negotiation sessions
    - Agent-based metrics analysis with model performance tracking
    - Advanced statistical testing for research validation
    - Performance comparison across different negotiation scenarios
    - Research-grade data processing and reporting

Analysis Capabilities:
    - Agent Performance Analysis: Tracks individual model metrics over time
    - Statistical Distribution Testing: Validates metric distributions
    - Comparative Analysis: Compares performance across models and games
    - Correlation Analysis: Identifies relationships between metrics
    - Longitudinal Analysis: Tracks performance changes over sessions

Usage:
    This module is designed for researchers analyzing negotiation platform
    output logs. It provides both command-line and programmatic interfaces
    for comprehensive statistical analysis of agent performance metrics.

Example:
    python metrics_statistics.py negotiation_log.out
"""

import re
import sys
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
from scipy.stats import chi2_contingency


[docs]
def parse_negotiation_log_agent_metrics(file_path):
    """
    Parses a negotiation log file to extract agent-based metrics, ensuring
    one row per model per game.

    Args:
        file_path (str): Path to the negotiation log file.

    Returns:
        tuple: A tuple containing:
            - pd.DataFrame: DataFrame with parsed agent-based metrics.
            - str: The detected game type (e.g., 'integrative_negotiation').

    Raises:
        FileNotFoundError: If the specified file does not exist.
        ValueError: If the log file format is invalid or cannot be parsed.

    Example:
        >>> df, game_type = parse_negotiation_log_agent_metrics("log_file.out")
        >>> print(game_type)
        'integrative_negotiation'
    """
    with open(file_path, 'r', encoding='utf-8') as f:
        log_text = f.read()
    # Detect game type
    if re.search(r'IT\s+Team.*Marketing\s+Team', log_text, re.IGNORECASE):
        game_type = 'integrative_negotiation'
    elif re.search(r'BUYER.*SELLER|company\s+car', log_text, re.IGNORECASE):
        game_type = 'company_car'
    elif re.search(r'Development\s+Team.*Marketing\s+Team|GPU|resource\s+allocation', log_text, re.IGNORECASE):
        game_type = 'resource_allocation'
    else:
        game_type = 'unknown'
    iteration_blocks = re.split(r'===\s*Iteration\s+(\d+)/\d+\s*===', log_text)
    data = []
    for i in range(1, len(iteration_blocks), 2):
        if i + 1 >= len(iteration_blocks):
            break
        iteration_num = int(iteration_blocks[i])
        block = iteration_blocks[i + 1]
        if not block.strip():
            continue
        # Parse role assignments
        model_role_mapping = {}
        role_assignment_match = re.search(r'🎲\s*\[ROLE ASSIGNMENT\]\s*(.*)', block, re.IGNORECASE)
        if role_assignment_match:
            assignment_text = role_assignment_match.group(1)
            individual_assignments = re.findall(r'(model_[abc])\s*=\s*(\w+)', assignment_text, re.IGNORECASE)
            for model, role in individual_assignments:
                model_role_mapping[model] = role.upper()
        if len(model_role_mapping) < 2:
            continue
        # Parse first mover
        first_proposal_integrative = re.search(r'💡\s+Player\s+(model_[abc])\s+made\s+proposal\s+\(#1/4\)', block, re.IGNORECASE)
        first_proposal_company_car = re.search(r'💡\s+Player\s+(model_[abc])\s+made\s+offer.*\(proposal\s+1/4\)', block, re.IGNORECASE)
        first_mover = (
            first_proposal_integrative.group(1) if first_proposal_integrative
            else first_proposal_company_car.group(1) if first_proposal_company_car
            else 'unknown'
        )
        
        # Parse agreement information
        agreement_reached = False
        agreement_round = None
        
        agreement_reached_match = re.search(r'Agreement\s+reached:\s*(True|False)', block, re.IGNORECASE)
        if agreement_reached_match:
            agreement_reached = agreement_reached_match.group(1).lower() == 'true'
        
        if agreement_reached:
            agreement_round_match = re.search(r'Agreement\s+round:\s*(\d+)', block, re.IGNORECASE)
            if agreement_round_match:
                agreement_round = int(agreement_round_match.group(1))
        
        # Parse metrics for each model
        for model in model_role_mapping.keys():
            metrics = {}
            # Risk Minimization
            risk_match = re.search(rf'✅\s+Calculated\s+risk_minimization:.*{model}[\'"]:\s*([\d.]+)', block, re.IGNORECASE)
            if risk_match:
                metrics['Risk_Minimization'] = float(risk_match.group(1))
            # Deadline Sensitivity  
            deadline_match = re.search(rf'✅\s+Calculated\s+deadline_sensitivity:.*{model}[\'"]:\s*([\d.]+)', block, re.IGNORECASE)
            if deadline_match:
                metrics['Deadline_Sensitivity'] = float(deadline_match.group(1))
            # Feasibility (binary)
            feasibility_match = re.search(rf'✅\s+Calculated\s+feasibility:.*{model}[\'"]:\s*([\d.]+)', block, re.IGNORECASE)
            if feasibility_match:
                metrics['Feasibility'] = float(feasibility_match.group(1))
            # Utility Surplus
            utility_surplus_match = re.search(rf'✅\s+Calculated\s+utility_surplus:.*{model}[\'"]:\s*([-\d.]+)', block, re.IGNORECASE)
            if utility_surplus_match:
                metrics['Utility_Surplus'] = float(utility_surplus_match.group(1))
            data.append({
                'Iteration': iteration_num,
                'Model': model,
                'Role': model_role_mapping[model],
                'Is_First_Mover': int(first_mover == model),
                'First_Mover_Model': first_mover,
                'Game_type': game_type,
                'Agreement_Reached': agreement_reached,
                'Agreement_Round': agreement_round,
                **metrics
            })
    df = pd.DataFrame(data)
    print(f"✅ Parsed {len(df)} model performances across {df['Iteration'].nunique()} games")
    return df, game_type



[docs]
def calculate_average_agreement_round(df):
    """
    Calculates the average round in which agreements were reached.

    Args:
        df (pd.DataFrame): DataFrame containing parsed agent-based metrics.

    Returns:
        dict: A dictionary containing:
            - 'total_games' (int): Total number of games analyzed.
            - 'agreements_reached' (int): Number of games where agreement was reached.
            - 'average_agreement_round' (float or None): Average round of agreement, or None if no agreements.
            - 'agreement_rate' (float): Percentage of games that reached agreement.

    Example:
        >>> stats = calculate_average_agreement_round(df)
        >>> print(f"Average agreement round: {stats['average_agreement_round']:.2f}")
        Average agreement round: 3.25
    """
    print(f"\n## 📊 AVERAGE AGREEMENT ROUND ANALYSIS")
    print("=" * 50)
    
    # Get unique games (one entry per iteration since both models have same agreement info)
    unique_games = df.drop_duplicates(subset=['Iteration']).copy()
    
    total_games = len(unique_games)
    agreements = unique_games[unique_games['Agreement_Reached'] == True]
    agreements_reached = len(agreements)
    
    if agreements_reached > 0:
        avg_agreement_round = agreements['Agreement_Round'].mean()
        agreement_rounds = agreements['Agreement_Round'].tolist()
        
        print(f"📈 Agreement Statistics:")
        print(f"   Total games analyzed: {total_games}")
        print(f"   Agreements reached: {agreements_reached}")
        print(f"   Agreement rate: {(agreements_reached/total_games)*100:.1f}%")
        print(f"   Average agreement round: {avg_agreement_round:.2f}")
        print(f"   Agreement rounds distribution: {sorted(agreement_rounds)}")
        
        return {
            'total_games': total_games,
            'agreements_reached': agreements_reached,
            'average_agreement_round': avg_agreement_round,
            'agreement_rate': (agreements_reached/total_games)*100,
            'agreement_rounds': agreement_rounds
        }
    else:
        print(f"📈 Agreement Statistics:")
        print(f"   Total games analyzed: {total_games}")
        print(f"   Agreements reached: {agreements_reached}")
        print(f"   Agreement rate: 0.0%")
        print(f"   Average agreement round: N/A (no agreements)")
        
        return {
            'total_games': total_games,
            'agreements_reached': agreements_reached,
            'average_agreement_round': None,
            'agreement_rate': 0.0,
            'agreement_rounds': []
        }



[docs]
def bias_corrected_metric_analysis(df, metric_name, metric_column):
    """
    Analyzes and reports bias-corrected model comparison for a given metric.

    Args:
        df (pd.DataFrame): DataFrame containing parsed agent-based metrics.
        metric_name (str): Name of the metric being analyzed (e.g., 'Risk Minimization').
        metric_column (str): Column name in the DataFrame corresponding to the metric.

    Returns:
        dict or None: A dictionary with bias-corrected analysis results if
        analysis is successful, otherwise None. The dictionary includes:
            - 'model_a' (str): Name of the first model.
            - 'model_b' (str): Name of the second model.
            - 'adjusted_difference' (float): Bias-adjusted difference between models.
            - 'predicted_mean_a' (float): Predicted mean for model_a.
            - 'predicted_mean_b' (float): Predicted mean for model_b.
            - 'p_value' (float): P-value of the test.
            - 'significant' (bool): Whether the result is statistically significant.

    Example:
        >>> results = bias_corrected_metric_analysis(df, 'Risk Minimization', 'Risk_Minimization')
        >>> print(results['adjusted_difference'])
        0.15
    """
    print(f"\n## 🎯 {metric_name.upper()} - BIAS-ADJUSTED MODEL COMPARISON")
    print("=" * 70)
    df_clean = df.dropna(subset=[metric_column])
    if len(df_clean) == 0:
        print(f"No {metric_name} data available")
        return None
    models = sorted(df_clean['Model'].unique())
    if len(models) != 2:
        print(f"Expected 2 models, found {len(models)}: {models}")
        return None
    model_a, model_b = models
    df_clean['is_model_a'] = (df_clean['Model'] == model_a).astype(int)
    # Choose regression type: OLS for continuous, Logit for binary
    is_binary = set(df_clean[metric_column].dropna().unique()) <= {0, 1}
    if is_binary:
        model = smf.logit(f"{metric_column} ~ is_model_a + C(Role) + C(Is_First_Mover)", data=df_clean).fit(disp=False)
        coeff = model.params.get('is_model_a', 0)
        pval = model.pvalues.get('is_model_a', 1)
        pred_a = model.predict({ 'is_model_a': 1, 'Role': df_clean['Role'].iloc[0], 'Is_First_Mover': 1 }).mean()
        pred_b = model.predict({ 'is_model_a': 0, 'Role': df_clean['Role'].iloc[0], 'Is_First_Mover': 0 }).mean()
    else:
        model = smf.ols(f"{metric_column} ~ is_model_a + C(Role) + C(Is_First_Mover)", data=df_clean).fit()
        coeff = model.params.get('is_model_a', 0)
        pval = model.pvalues.get('is_model_a', 1)
        pred_a = model.predict({ 'is_model_a': 1, 'Role': df_clean['Role'].iloc[0], 'Is_First_Mover': 1 }).mean()
        pred_b = model.predict({ 'is_model_a': 0, 'Role': df_clean['Role'].iloc[0], 'Is_First_Mover': 0 }).mean()
    print(model.summary())
    print(f"Bias-adjusted difference (model_a minus model_b): {coeff:.3f} (p = {pval:.4f})")
    print(f"Predicted means: {model_a}: {pred_a:.3f}, {model_b}: {pred_b:.3f}")
    return {
        'model_a': model_a,
        'model_b': model_b,
        'adjusted_difference': coeff,
        'predicted_mean_a': pred_a,
        'predicted_mean_b': pred_b,
        'p_value': pval,
        'significant': pval < 0.05
    }



[docs]
def main():
    """
    Main function to perform bias-corrected analysis on agent-based metrics
    from a negotiation log file.

    Usage:
        python compare_agent_metrics_bias_corrected.py <log_file.out>

    Args:
        None (command-line arguments are used).

    Returns:
        None: Outputs results to the console and exports corrected data to a CSV file.

    Example:
        $ python compare_agent_metrics_bias_corrected.py integrative_negotiation_1975553.out
    """
    if len(sys.argv) < 2:
        print("Usage: python compare_agent_metrics_bias_corrected.py <log_file.out>")
        sys.exit(1)
    file_path = sys.argv[1]
    print(f"\n{'='*80}")
    print(f"📂 AGENT METRICS BIAS-CORRECTED ANALYSIS: {file_path}")
    print(f"{'='*80}")
    df, game_type = parse_negotiation_log_agent_metrics(file_path)
    if df.empty:
        print("❌ ERROR: No data could be parsed from the log file.")
        return
    print(f"\n📊 Dataset Summary:")
    print(f"- Game type: {game_type.upper()}")
    print(f"- Total agent records: {len(df)} ({df['Iteration'].nunique()} games)")
    print(f"- Models: {df['Model'].unique()}")
    print(f"- Roles: {df['Role'].unique()}")
    metrics_config = [
        ('Risk Minimization', 'Risk_Minimization'),
        ('Deadline Sensitivity', 'Deadline_Sensitivity'),
        ('Feasibility', 'Feasibility'),
        ('Utility Surplus', 'Utility_Surplus')
    ]
    # Calculate average agreement round statistics
    agreement_stats = calculate_average_agreement_round(df)
    
    all_results = {}
    for metric_name, metric_column in metrics_config:
        if metric_column in df.columns and not df[metric_column].isna().all():
            result = bias_corrected_metric_analysis(df, metric_name, metric_column)
            all_results[metric_name] = result
        else:
            print(f"\n⚠️  {metric_name} data not available in parsed results")
    # Export
    output_csv = file_path.replace('.out', '_agent_metrics_bias_corrected.csv')
    df.to_csv(output_csv, index=False)
    print(f"\n✅ Agent metrics data exported to '{output_csv}'")
    print(f"\nSUMMARY:")
    print(f"📊 Agreement Statistics:")
    print(f"- Agreement rate: {agreement_stats['agreement_rate']:.1f}% ({agreement_stats['agreements_reached']}/{agreement_stats['total_games']} games)")
    if agreement_stats['average_agreement_round'] is not None:
        print(f"- Average agreement round: {agreement_stats['average_agreement_round']:.2f}")
    else:
        print(f"- Average agreement round: N/A (no agreements reached)")
    
    print(f"\n🔬 Metric Comparisons:")
    for metric_name, result in all_results.items():
        if result and result['significant']:
            print(f"- {metric_name}: {result['model_a']} ({result['predicted_mean_a']:.3f}) vs {result['model_b']} ({result['predicted_mean_b']:.3f}), diff = {result['adjusted_difference']:.3f} (p = {result['p_value']:.4f})")
        elif result:
            print(f"- {metric_name}: No significant difference (p = {result['p_value']:.4f})")
        else:
            print(f"- {metric_name}: Not available")

    
if __name__ == "__main__":
    main()