{
  "schema_version": 1,
  "snapshot_date": "2026-05-18",
  "commit": "ae4ab51",
  "notes": "Public summary generated from local logs and checkpoints. Raw logs, rollouts, and model files stay gitignored.",
  "runs": [
    {
      "run_id": "bc_baseline_20260512",
      "report": "001_bc_baseline.md",
      "model_path": "models/ppo_sts_bc.pt",
      "training_mode": "behavior_cloning",
      "games": 410,
      "worker_count": 3,
      "hyperparameters": {
        "learning_rate": 0.0005,
        "batch_size": 256,
        "validation_split": 0.1,
        "weight_decay": 1e-05,
        "label_smoothing": 0.02,
        "patience": 12
      },
      "training_metrics": {
        "samples": 86297,
        "validation_accuracy_final_percent": 84.948,
        "best_validation_loss": 0.395827
      },
      "eval_run_id": "bc_25_20260512_112612",
      "evaluation": {
        "games": 25,
        "average_floor": 13.08,
        "average_reward": -0.62,
        "win_rate_percent": 0.0,
        "act2_reach_rate_percent": 12.0,
        "floor20_rate_percent": 12.0,
        "elite_fights": 32,
        "elite_wins": 25,
        "boss_fights": 10,
        "boss_wins": 3
      },
      "notes": "Usable supervised warm start, but still below the source heuristic on the 25-seed evaluation slice."
    },
    {
      "run_id": "parallel_ppo_20260512",
      "report": "002_parallel_ppo.md",
      "model_path": "models/ppo_sts.pt",
      "training_mode": "parallel_ppo",
      "games": 160,
      "worker_count": 3,
      "hyperparameters": {
        "batch_games": 8,
        "learning_rate_initial": 3e-05,
        "learning_rate_latest": 1.875e-05,
        "epochs": 4,
        "clip": 0.15,
        "target_kl": 0.03,
        "entropy_coefficient_initial": 0.001,
        "max_rollout_lag": 4,
        "auto_tune": true
      },
      "training_metrics": {
        "ppo_update_rows": 19,
        "rollouts_consumed": 152,
        "latest_transition_total": 21658,
        "latest_approx_kl": 0.0364123,
        "latest_clip_fraction": 0.189614,
        "latest_explained_variance": 0.177785,
        "stale_rollouts": 0
      },
      "eval_run_id": "ppo_current_25_20260512_112612",
      "evaluation": {
        "games": 25,
        "average_floor": 13.08,
        "average_reward": -0.62,
        "win_rate_percent": 0.0,
        "act2_reach_rate_percent": 12.0,
        "floor20_rate_percent": 12.0,
        "elite_fights": 32,
        "elite_wins": 25,
        "boss_fights": 10,
        "boss_wins": 3
      },
      "notes": "Parallel trainer mechanics worked with zero stale rollouts, but model quality has not yet separated from BC."
    },
    {
      "run_id": "heuristic_fixed_seed_20260512",
      "report": "003_fixed_seed_eval.md",
      "model_path": "heuristic",
      "training_mode": "heuristic_eval",
      "games": 25,
      "worker_count": 1,
      "hyperparameters": {
        "seed_file": "seeds/eval_200.txt"
      },
      "eval_run_id": "heuristic_25_20260512_112612",
      "evaluation": {
        "games": 25,
        "average_floor": 16.6,
        "average_reward": 11.38,
        "win_rate_percent": 0.0,
        "act2_reach_rate_percent": 32.0,
        "floor20_rate_percent": 32.0,
        "elite_fights": 33,
        "elite_wins": 27,
        "boss_fights": 18,
        "boss_wins": 8
      },
      "notes": "Current reference policy for fixed-seed comparison."
    },
    {
      "run_id": "long_ppo_eval_20260514",
      "report": "004_long_ppo_eval.md",
      "model_path": "models/ppo_sts.pt",
      "training_mode": "parallel_ppo_long_run",
      "games": 2496,
      "worker_count": 3,
      "hyperparameters": {
        "batch_games": 8,
        "learning_rate_initial": 3e-05,
        "learning_rate_latest": 2.2888184e-05,
        "epochs": 4,
        "clip": 0.15,
        "target_kl": 0.03,
        "entropy_coefficient_latest": 0.00117128,
        "bc_anchor_coefficient_latest": 0.01,
        "max_rollout_lag": 4,
        "auto_tune": true
      },
      "training_metrics": {
        "ppo_update_rows": 311,
        "latest_transition_batch": 1443,
        "total_update_transitions": 372305,
        "average_final_floor": 13.797,
        "last_500_average_floor": 14.924,
        "last_100_average_floor": 15.0,
        "latest_approx_kl": 0.00797181,
        "latest_clip_fraction": 0.106109,
        "latest_normalized_entropy": 0.256081,
        "latest_explained_variance": 0.677278,
        "stale_rollouts": 0,
        "legacy_rollouts": 0,
        "skipped_rollouts": 0
      },
      "eval_run_id": "ppo_current_25_20260514_094347",
      "evaluation": {
        "games": 25,
        "average_floor": 16.4,
        "best_floor": 36,
        "average_reward": 9.57,
        "win_rate_percent": 0.0,
        "act2_reach_rate_percent": 24.0,
        "floor20_rate_percent": 24.0,
        "elite_fights": 26,
        "elite_wins": 21,
        "boss_fights": 16,
        "boss_wins": 7
      },
      "comparison": {
        "heuristic_eval_run_id": "heuristic_25_20260514_094347",
        "heuristic_average_floor": 16.84,
        "bc_eval_run_id": "bc_25_20260514_094347",
        "bc_average_floor": 13.08,
        "ppo_minus_bc_average_floor": 3.32,
        "ppo_minus_heuristic_average_floor": -0.44
      },
      "notes": "After the long PPO run, the checkpoint improved materially over BC on the 25-seed slice and nearly matched the heuristic average floor, though it still recorded no full victories."
    },
    {
      "run_id": "ppo_4136_eval_20260516",
      "report": "005_ppo_4136_eval.md",
      "model_path": "models/ppo_sts.pt",
      "training_mode": "parallel_ppo_extended_run",
      "games": 4136,
      "worker_count": 5,
      "hyperparameters": {
        "batch_games": 8,
        "learning_rate_initial": 3e-05,
        "learning_rate_latest": 1.7462298e-05,
        "epochs": 4,
        "clip": 0.15,
        "target_kl": 0.03,
        "entropy_coefficient_latest": 0.00074944,
        "bc_anchor_coefficient_latest": 0.01,
        "max_rollout_lag": 4,
        "auto_tune": true
      },
      "training_metrics": {
        "ppo_update_rows": 515,
        "latest_transition_batch": 1415,
        "total_update_transitions": 644393,
        "average_final_floor": 14.188,
        "last_1500_average_floor": 14.767,
        "last_500_average_floor": 14.562,
        "last_100_average_floor": 14.31,
        "latest_approx_kl": 0.00362303,
        "latest_clip_fraction": 0.076596,
        "latest_normalized_entropy": 0.241497,
        "latest_explained_variance": 0.600711,
        "latest_auto_tune_action": "middle:bc_slow_down",
        "stale_rollouts_latest": 6,
        "stale_rollouts_total": 6,
        "legacy_rollouts_total": 0,
        "skipped_rollouts_total": 0,
        "early_stop_rows": 5
      },
      "eval_run_id": "ppo_current_150_20260515_172740",
      "evaluation": {
        "games": 150,
        "average_floor": 14.7,
        "best_floor": 33,
        "average_reward": 2.37,
        "win_rate_percent": 0.0,
        "act2_reach_rate_percent": 18.7,
        "floor20_rate_percent": 18.0,
        "elite_fights": 155,
        "elite_wins": 116,
        "boss_fights": 92,
        "boss_wins": 28
      },
      "comparison": {
        "heuristic_eval_run_id": "heuristic_150_20260515_115841",
        "heuristic_average_floor": 15.78,
        "bc_eval_run_id": "bc_150_20260515_172740",
        "bc_average_floor": 12.81,
        "ppo_minus_bc_average_floor": 1.89,
        "ppo_minus_heuristic_average_floor": -1.08,
        "common_seed_count": 148,
        "common_seed_ppo_average_floor": 14.62,
        "common_seed_bc_average_floor": 12.83,
        "common_seed_heuristic_average_floor": 15.78
      },
      "seed_audit": {
        "heuristic_150_20260515_115841": {
          "games": 150,
          "matches_first_150_seed_file": false,
          "unique_seeds": 149
        },
        "bc_150_20260515_172740": {
          "games": 150,
          "matches_first_150_seed_file": false,
          "unique_seeds": 149
        },
        "ppo_current_150_20260515_172740": {
          "games": 150,
          "matches_first_150_seed_file": true,
          "unique_seeds": 150
        }
      },
      "notes": "After 4,136 PPO rollout games, the PPO checkpoint remains clearly above BC on the 150-game evaluation but is still behind the heuristic baseline. Entropy is inside the healthy auto-tune band; the next changes should focus on clean fixed-seed evaluation, controlled update-strength testing, and Act 1 boss/elite data."
    },
    {
      "run_id": "ppo_5146_eval_20260518",
      "report": "006_ppo_5146_eval.md",
      "model_path": "models/ppo_sts.pt",
      "training_mode": "parallel_ppo_extended_run",
      "games": 5146,
      "worker_count": 5,
      "hyperparameters": {
        "batch_games": 8,
        "learning_rate_initial": 3e-05,
        "learning_rate_latest": 1.6e-05,
        "epochs": 4,
        "clip": 0.15,
        "target_kl": 0.03,
        "entropy_coefficient_latest": 0.00065,
        "bc_anchor_coefficient_latest": 0.01,
        "max_rollout_lag": 4,
        "auto_tune": true
      },
      "training_metrics": {
        "ppo_update_rows": 641,
        "total_update_transitions": 760000,
        "last_500_average_floor": 13.21,
        "latest_approx_kl": 0.00734499,
        "latest_clip_fraction": 0.108192,
        "latest_normalized_entropy": 0.264956,
        "stale_rollouts_total": 6,
        "early_stop_rows": 7
      },
      "eval_run_id": "ppo_current_200_20260517_141834",
      "evaluation": {
        "games": 200,
        "average_floor": 15.44,
        "best_floor": 42,
        "average_reward": 4.03,
        "win_rate_percent": 0.0,
        "act2_reach_rate_percent": 20.0,
        "floor20_rate_percent": 18.5,
        "elite_fights": 219,
        "elite_wins": 175,
        "boss_fights": 132,
        "boss_wins": 41
      },
      "comparison": {
        "heuristic_eval_run_id": "heuristic_150_20260515_115841",
        "heuristic_average_floor": 15.78,
        "bc_eval_run_id": "bc_150_20260515_172740",
        "bc_average_floor": 12.81,
        "ppo_minus_bc_average_floor": 2.63,
        "ppo_minus_heuristic_average_floor": -0.34
      },
      "notes": "After 5,146 PPO rollout games with 200-game fixed-seed eval, the PPO checkpoint is within 0.34 floors of the heuristic (15.44 vs 15.78). Elite win rate reached 79.9%, approaching heuristic's 81.9%. Best single run hit floor 42 (Act 3). The Act 1 boss remains the primary bottleneck with ~31% conversion rate vs heuristic's 39%."
    }
  ]
}