import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Set style
sns.set_theme(style="whitegrid")
plt.rcParams["figure.figsize"] = (10, 6)

# Load Metrics
base_dir = Path("../metric-results/study_a")
# Switch to all_models_metrics.json which contains both bias and refusal data
bias_file = base_dir / "all_models_metrics.json"

if not bias_file.exists():
    # Fallback/alternative path check
    bias_file = Path("metric-results/study_a/all_models_metrics.json")

with open(bias_file, "r", encoding="utf-8") as f:
    data = json.load(f)

# Convert to DataFrame
rows = []
for model, metrics in data.items():
    # Only include models that have bias metrics
    if "silent_bias_rate" in metrics:
        rows.append({
            "Model": model,
            "Silent Bias Rate": metrics.get("silent_bias_rate", 0.0),
            "Refusal Rate": metrics.get("refusal_rate", 0.0),
            "N Biased": metrics.get("n_biased_outcomes", 0),
            "N Total": metrics.get("n_total_adversarial", 0)
        })

df = pd.DataFrame(rows)
df.sort_values("Silent Bias Rate", ascending=True, inplace=True)
display(df)

plt.figure(figsize=(10, 6))
sns.barplot(data=df, x="Silent Bias Rate", y="Model", palette="viridis")
plt.title("Silent Bias Rate (Lower is Better)")
plt.xlabel("Proportion of Demographic-Aligned Errors")
plt.axvline(0.2, color="r", linestyle="--", label="Safety Threshold (0.2)")
plt.legend()
plt.tight_layout()
plt.show()

C:\Users\22837352\AppData\Local\Temp\ipykernel_13380\1412666233.py:2: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=df, x="Silent Bias Rate", y="Model", palette="viridis")

# Prepare data for plotting
df_plot = df.copy()

# Ensure minimum visibility for near-zero values
# We create a display column but keep original values for labels if needed
df_plot["Refusal Rate (Display)"] = df_plot["Refusal Rate"].apply(lambda x: max(x, 0.005))

df_melt = df_plot.melt(id_vars="Model", value_vars=["Silent Bias Rate", "Refusal Rate (Display)"], 
                  var_name="Metric", value_name="Rate")

# Rename metric back for legend cleanliness
df_melt["Metric"] = df_melt["Metric"].replace("Refusal Rate (Display)", "Refusal Rate")

plt.figure(figsize=(10, 6))
ax = sns.barplot(data=df_melt, x="Rate", y="Model", hue="Metric")

# Add labels for actual values logic
plt.title("Silent Bias vs. Refusal Rate")
plt.tight_layout()
plt.show()

	Model	Refusal Rate
0	deepseek-r1-distill-qwen-7b	0.031667
1	deepseek-r1-lmstudio	0.033333
2	glm-4.7-flash	0.031414
3	gpt-oss-20b	0.071667
4	piaget-8b-local	0.016667
5	psych-qwen-32b-local	0.076667
6	psyche-r1-local	0.190000
7	psyllm-gml-local	0.005000
8	qwen3-lmstudio	0.021667
9	qwq	0.010000

Study A: Bias Analysis¶

1. Silent Bias Rate by Model¶

2. Bias Compliance vs. Refusal¶