{
  "meta": {
    "project_title": "NumPy versus PyArrow with Lahman Baseball Data",
    "project_slug": "numpy-versus-pyarrow",
    "test_slug": "many-file-baseball-workload",
    "test_title": "Many-file baseball analysis workload",
    "generated_at": "2026-06-16T17:42:38.109522+00:00",
    "data_dir": "C:\\code\\projects\\data-reports\\numpy-vs-pyarrow\\data",
    "output_path": "C:\\code\\projects\\data-reports\\numpy-vs-pyarrow\\output\\test3\\data.json",
    "runs_per_reader": 100,
    "warmups_per_reader": 2,
    "environment": {
      "python_version": "3.14.6 (tags/v3.14.6:c63aec6, Jun 10 2026, 10:26:10) [MSC v.1944 64 bit (AMD64)]",
      "python_executable": "C:\\code\\projects\\data-reports\\numpy-vs-pyarrow\\venv\\Scripts\\python.exe",
      "pandas_version": "3.0.3",
      "pyarrow_version": "24.0.0",
      "platform": "Windows-11-10.0.26200-SP0",
      "machine": "AMD64",
      "processor": "Intel64 Family 6 Model 186 Stepping 2, GenuineIntel",
      "system": "Windows",
      "release": "11"
    }
  },
  "summary": {
    "workloads_tested": 6,
    "pyarrow_faster_count": 6,
    "pandas_default_faster_count": 0,
    "no_result_count": 0,
    "workloads_with_missing_files": [],
    "largest_pyarrow_win": {
      "key": "regular_season_player_stats",
      "label": "Regular season player stats",
      "file_count": 4,
      "total_size_mb": 29.712436,
      "pandas_default_avg_ms": 802.566,
      "pyarrow_avg_ms": 136.0006,
      "speedup_default_over_pyarrow": 5.9012,
      "winner": "pyarrow"
    }
  },
  "methodology_notes": [
    "Each workload loads a group of Lahman CSV files that might be used together in a practical baseball analysis.",
    "This test measures many-file loading only. It does not include joins, groupbys, modeling, or visualization.",
    "Keeping joins out of this test keeps the focus on CSV loading rather than downstream pandas operations."
  ],
  "workloads": [
    {
      "key": "core_people_and_teams",
      "label": "Core people and teams",
      "description": "Loads People, Teams, TeamsFranchises, Appearances, and HomeGames. This is a small-to-medium metadata/context workload.",
      "files_requested": [
        "People.csv",
        "Teams.csv",
        "TeamsFranchises.csv",
        "Appearances.csv",
        "HomeGames.csv"
      ],
      "files_loaded": [
        "People.csv",
        "Teams.csv",
        "TeamsFranchises.csv",
        "Appearances.csv",
        "HomeGames.csv"
      ],
      "missing_files": [],
      "file_count": 5,
      "total_size_bytes": 11941578,
      "total_size_mb": 11.941578,
      "pandas_default": {
        "label": "pandas_default_many_file_load",
        "ok": true,
        "error": null,
        "warmup_errors": [],
        "run_errors": [],
        "runs_requested": 100,
        "runs_completed": 100,
        "avg_ms": 246.0655,
        "median_ms": 238.7048,
        "min_ms": 226.2398,
        "max_ms": 348.6233,
        "stdev_ms": 22.2544
      },
      "pyarrow": {
        "label": "pyarrow_many_file_load",
        "ok": true,
        "error": null,
        "warmup_errors": [],
        "run_errors": [],
        "runs_requested": 100,
        "runs_completed": 100,
        "avg_ms": 47.7431,
        "median_ms": 46.4053,
        "min_ms": 41.2097,
        "max_ms": 64.7735,
        "stdev_ms": 5.2981
      },
      "speedup_default_over_pyarrow": 5.1539,
      "winner": "pyarrow"
    },
    {
      "key": "regular_season_player_stats",
      "label": "Regular season player stats",
      "description": "Loads the main regular-season player statistic tables: Batting, Pitching, Fielding, and Appearances.",
      "files_requested": [
        "Batting.csv",
        "Pitching.csv",
        "Fielding.csv",
        "Appearances.csv"
      ],
      "files_loaded": [
        "Batting.csv",
        "Pitching.csv",
        "Fielding.csv",
        "Appearances.csv"
      ],
      "missing_files": [],
      "file_count": 4,
      "total_size_bytes": 29712436,
      "total_size_mb": 29.712436,
      "pandas_default": {
        "label": "pandas_default_many_file_load",
        "ok": true,
        "error": null,
        "warmup_errors": [],
        "run_errors": [],
        "runs_requested": 100,
        "runs_completed": 100,
        "avg_ms": 802.566,
        "median_ms": 752.8484,
        "min_ms": 565.7207,
        "max_ms": 1347.1418,
        "stdev_ms": 145.9619
      },
      "pyarrow": {
        "label": "pyarrow_many_file_load",
        "ok": true,
        "error": null,
        "warmup_errors": [],
        "run_errors": [],
        "runs_requested": 100,
        "runs_completed": 100,
        "avg_ms": 136.0006,
        "median_ms": 113.0161,
        "min_ms": 91.7474,
        "max_ms": 295.9512,
        "stdev_ms": 43.3626
      },
      "speedup_default_over_pyarrow": 5.9012,
      "winner": "pyarrow"
    },
    {
      "key": "postseason_stats",
      "label": "Postseason stats",
      "description": "Loads postseason batting, pitching, fielding, and series data.",
      "files_requested": [
        "BattingPost.csv",
        "PitchingPost.csv",
        "FieldingPost.csv",
        "SeriesPost.csv"
      ],
      "files_loaded": [
        "BattingPost.csv",
        "PitchingPost.csv",
        "FieldingPost.csv",
        "SeriesPost.csv"
      ],
      "missing_files": [],
      "file_count": 4,
      "total_size_bytes": 2699369,
      "total_size_mb": 2.699369,
      "pandas_default": {
        "label": "pandas_default_many_file_load",
        "ok": true,
        "error": null,
        "warmup_errors": [],
        "run_errors": [],
        "runs_requested": 100,
        "runs_completed": 100,
        "avg_ms": 90.3498,
        "median_ms": 82.6989,
        "min_ms": 62.8876,
        "max_ms": 301.4825,
        "stdev_ms": 41.1783
      },
      "pyarrow": {
        "label": "pyarrow_many_file_load",
        "ok": true,
        "error": null,
        "warmup_errors": [],
        "run_errors": [],
        "runs_requested": 100,
        "runs_completed": 100,
        "avg_ms": 54.1513,
        "median_ms": 52.7598,
        "min_ms": 42.9377,
        "max_ms": 88.8786,
        "stdev_ms": 8.3517
      },
      "speedup_default_over_pyarrow": 1.6685,
      "winner": "pyarrow"
    },
    {
      "key": "awards_and_hall_of_fame",
      "label": "Awards and Hall of Fame",
      "description": "Loads player awards, manager awards, award shares, and Hall of Fame voting data.",
      "files_requested": [
        "AwardsPlayers.csv",
        "AwardsManagers.csv",
        "AwardsSharePlayers.csv",
        "AwardsShareManagers.csv",
        "HallOfFame.csv"
      ],
      "files_loaded": [
        "AwardsPlayers.csv",
        "AwardsManagers.csv",
        "AwardsSharePlayers.csv",
        "AwardsShareManagers.csv",
        "HallOfFame.csv"
      ],
      "missing_files": [],
      "file_count": 5,
      "total_size_bytes": 1306713,
      "total_size_mb": 1.306713,
      "pandas_default": {
        "label": "pandas_default_many_file_load",
        "ok": true,
        "error": null,
        "warmup_errors": [],
        "run_errors": [],
        "runs_requested": 100,
        "runs_completed": 100,
        "avg_ms": 53.6012,
        "median_ms": 49.4168,
        "min_ms": 40.5848,
        "max_ms": 152.4782,
        "stdev_ms": 14.9341
      },
      "pyarrow": {
        "label": "pyarrow_many_file_load",
        "ok": true,
        "error": null,
        "warmup_errors": [],
        "run_errors": [],
        "runs_requested": 100,
        "runs_completed": 100,
        "avg_ms": 36.0874,
        "median_ms": 35.7443,
        "min_ms": 27.376,
        "max_ms": 70.5386,
        "stdev_ms": 6.1657
      },
      "speedup_default_over_pyarrow": 1.4853,
      "winner": "pyarrow"
    },
    {
      "key": "salary_context",
      "label": "Salary context",
      "description": "Loads Salaries with People, Teams, and Appearances for a practical money-and-player-context workload.",
      "files_requested": [
        "Salaries.csv",
        "People.csv",
        "Teams.csv",
        "Appearances.csv"
      ],
      "files_loaded": [
        "Salaries.csv",
        "People.csv",
        "Teams.csv",
        "Appearances.csv"
      ],
      "missing_files": [],
      "file_count": 4,
      "total_size_bytes": 12558578,
      "total_size_mb": 12.558578,
      "pandas_default": {
        "label": "pandas_default_many_file_load",
        "ok": true,
        "error": null,
        "warmup_errors": [],
        "run_errors": [],
        "runs_requested": 100,
        "runs_completed": 100,
        "avg_ms": 264.9372,
        "median_ms": 250.6335,
        "min_ms": 234.8798,
        "max_ms": 450.1297,
        "stdev_ms": 42.8351
      },
      "pyarrow": {
        "label": "pyarrow_many_file_load",
        "ok": true,
        "error": null,
        "warmup_errors": [],
        "run_errors": [],
        "runs_requested": 100,
        "runs_completed": 100,
        "avg_ms": 63.6056,
        "median_ms": 70.2948,
        "min_ms": 40.9492,
        "max_ms": 87.2142,
        "stdev_ms": 12.3623
      },
      "speedup_default_over_pyarrow": 4.1653,
      "winner": "pyarrow"
    },
    {
      "key": "all_csv_tables",
      "label": "All Lahman CSV tables",
      "description": "Loads every CSV file found in the data directory. This is the broadest raw loading workload.",
      "files_requested": [],
      "files_loaded": [
        "AllstarFull.csv",
        "Appearances.csv",
        "AwardsManagers.csv",
        "AwardsPlayers.csv",
        "AwardsShareManagers.csv",
        "AwardsSharePlayers.csv",
        "Batting.csv",
        "BattingPost.csv",
        "CollegePlaying.csv",
        "Fielding.csv",
        "FieldingOF.csv",
        "FieldingOFsplit.csv",
        "FieldingPost.csv",
        "HallOfFame.csv",
        "HomeGames.csv",
        "Managers.csv",
        "ManagersHalf.csv",
        "Parks.csv",
        "People.csv",
        "Pitching.csv",
        "PitchingPost.csv",
        "Salaries.csv",
        "Schools.csv",
        "SeriesPost.csv",
        "Teams.csv",
        "TeamsFranchises.csv",
        "TeamsHalf.csv"
      ],
      "missing_files": [],
      "file_count": 27,
      "total_size_bytes": 42147792,
      "total_size_mb": 42.147792,
      "pandas_default": {
        "label": "pandas_default_many_file_load",
        "ok": true,
        "error": null,
        "warmup_errors": [],
        "run_errors": [],
        "runs_requested": 100,
        "runs_completed": 100,
        "avg_ms": 892.148,
        "median_ms": 817.4462,
        "min_ms": 763.4936,
        "max_ms": 1230.585,
        "stdev_ms": 143.7348
      },
      "pyarrow": {
        "label": "pyarrow_many_file_load",
        "ok": true,
        "error": null,
        "warmup_errors": [],
        "run_errors": [],
        "runs_requested": 100,
        "runs_completed": 100,
        "avg_ms": 167.1007,
        "median_ms": 144.6263,
        "min_ms": 128.8058,
        "max_ms": 264.8139,
        "stdev_ms": 43.1218
      },
      "speedup_default_over_pyarrow": 5.339,
      "winner": "pyarrow"
    }
  ],
  "quality_checks": {
    "csv_file_count": 27,
    "workload_count": 6,
    "workloads_with_missing_files": []
  }
}