{
  "generated_at": "2026-06-30T21:30:18.790986+00:00",
  "round": "round2_multi_file_csv_workloads",
  "description": "Deliberate multi-file CSV workloads comparing pandas default CSV parsing against pandas with the PyArrow CSV engine.",
  "environment": {
    "python_version": "3.14.6 (tags/v3.14.6:c63aec6, Jun 10 2026, 10:26:10) [MSC v.1944 64 bit (AMD64)]",
    "platform": "Windows-11-10.0.26200-SP0",
    "pandas_version": "3.0.3",
    "psutil_version": "7.2.2"
  },
  "configuration": {
    "workload_manifest": "C:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\data\\manifest\\tlc_workload_manifest.csv",
    "derivation_manifest": "C:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\data\\manifest\\tlc_csv_derivation_manifest.csv",
    "output_dir": "C:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\output\\round2-multi-file",
    "repetitions": 3,
    "force_gc": false,
    "timing_notes": [
      "Each reader and workload runs in an isolated subprocess.",
      "Reader order alternates between workloads.",
      "The read phase includes loading and retaining every source file before concatenation.",
      "The concat phase measures pd.concat(..., ignore_index=True, copy=False).",
      "Peak RSS is sampled every 50 milliseconds and is approximate.",
      "Operating system file cache may affect local read timings."
    ]
  },
  "summary": {
    "workloads_requested": 7,
    "workloads_comparable": 7,
    "pyarrow_wins": 7,
    "default_wins": 0,
    "ties_within_1_percent": 0,
    "reader_failures": 0,
    "median_speedup_default_over_pyarrow": 5.874171491263248,
    "average_speedup_default_over_pyarrow": 6.054802128704172
  },
  "comparisons": [
    {
      "workload_id": "cross_year_january",
      "workload_name": "Cross-year January",
      "workload_type": "sequential_concat",
      "description": "Same calendar month across four years, allowing cross-year comparison without seasonal mismatch.",
      "file_count": 4,
      "input_ids": "yellow_2019_01|yellow_2020_01|yellow_2021_01|yellow_2022_01",
      "relative_paths": "data/derived-csv/yellow/2019/yellow_tripdata_2019-01.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-01.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-01.csv|data/derived-csv/yellow/2022/yellow_tripdata_2022-01.csv",
      "total_parquet_bytes": 263828508,
      "total_csv_bytes": 1922566805,
      "expected_rows": 17935325,
      "expected_columns": 19,
      "default_status": "ok",
      "pyarrow_status": "ok",
      "default_total_median_ms": 30334.972299999998,
      "pyarrow_total_median_ms": 5164.1278,
      "speedup_default_over_pyarrow": 5.874171491263248,
      "winner": "pandas_pyarrow",
      "default_error": null,
      "pyarrow_error": null
    },
    {
      "workload_id": "high_volume_q1_2019",
      "workload_name": "High-volume Q1 2019",
      "workload_type": "sequential_concat",
      "description": "Three large pre-pandemic files representing a heavy quarterly export.",
      "file_count": 3,
      "input_ids": "yellow_2019_01|yellow_2019_02|yellow_2019_03",
      "relative_paths": "data/derived-csv/yellow/2019/yellow_tripdata_2019-01.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-02.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-03.csv",
      "total_parquet_bytes": 329813031,
      "total_csv_bytes": 2426744010,
      "expected_rows": 22612607,
      "expected_columns": 19,
      "default_status": "ok",
      "pyarrow_status": "ok",
      "default_total_median_ms": 27654.2271,
      "pyarrow_total_median_ms": 7616.5635999999995,
      "speedup_default_over_pyarrow": 3.6308010478636326,
      "winner": "pandas_pyarrow",
      "default_error": null,
      "pyarrow_error": null
    },
    {
      "workload_id": "high_volume_year_2019",
      "workload_name": "High-volume year 2019",
      "workload_type": "sequential_concat",
      "description": "Complete pre-pandemic year. Primary high-volume stress-test workload.",
      "file_count": 12,
      "input_ids": "yellow_2019_01|yellow_2019_02|yellow_2019_03|yellow_2019_04|yellow_2019_05|yellow_2019_06|yellow_2019_07|yellow_2019_08|yellow_2019_09|yellow_2019_10|yellow_2019_11|yellow_2019_12",
      "relative_paths": "data/derived-csv/yellow/2019/yellow_tripdata_2019-01.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-02.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-03.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-04.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-05.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-06.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-07.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-08.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-09.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-10.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-11.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-12.csv",
      "total_parquet_bytes": 1243532931,
      "total_csv_bytes": 9146557593,
      "expected_rows": 84598444,
      "expected_columns": 19,
      "default_status": "ok",
      "pyarrow_status": "ok",
      "default_total_median_ms": 221708.58610000001,
      "pyarrow_total_median_ms": 87965.5839,
      "speedup_default_over_pyarrow": 2.5204014600987605,
      "winner": "pandas_pyarrow",
      "default_error": null,
      "pyarrow_error": null
    },
    {
      "workload_id": "mixed_shape_workload",
      "workload_name": "Mixed-shape workload",
      "workload_type": "sequential_concat",
      "description": "Deliberately uneven analyst-folder workload: tiny, small, medium, large, and giant CSV files.",
      "file_count": 5,
      "input_ids": "yellow_2020_04|yellow_2020_08|yellow_2021_06|yellow_2022_10|yellow_2019_03",
      "relative_paths": "data/derived-csv/yellow/2020/yellow_tripdata_2020-04.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-08.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-06.csv|data/derived-csv/yellow/2022/yellow_tripdata_2022-10.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-03.csv",
      "total_parquet_bytes": 238194985,
      "total_csv_bytes": 1700589745,
      "expected_rows": 15621654,
      "expected_columns": 19,
      "default_status": "ok",
      "pyarrow_status": "ok",
      "default_total_median_ms": 30476.7687,
      "pyarrow_total_median_ms": 4743.1474,
      "speedup_default_over_pyarrow": 6.425431497237468,
      "winner": "pandas_pyarrow",
      "default_error": null,
      "pyarrow_error": null
    },
    {
      "workload_id": "recovery_h2_2020",
      "workload_name": "Recovery H2 2020",
      "workload_type": "sequential_concat",
      "description": "Six sequential files showing gradual recovery after the initial pandemic collapse.",
      "file_count": 6,
      "input_ids": "yellow_2020_07|yellow_2020_08|yellow_2020_09|yellow_2020_10|yellow_2020_11|yellow_2020_12",
      "relative_paths": "data/derived-csv/yellow/2020/yellow_tripdata_2020-07.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-08.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-09.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-10.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-11.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-12.csv",
      "total_parquet_bytes": 124281459,
      "total_csv_bytes": 839598650,
      "expected_rows": 7800745,
      "expected_columns": 19,
      "default_status": "ok",
      "pyarrow_status": "ok",
      "default_total_median_ms": 15175.2099,
      "pyarrow_total_median_ms": 1714.8481,
      "speedup_default_over_pyarrow": 8.849302687509173,
      "winner": "pandas_pyarrow",
      "default_error": null,
      "pyarrow_error": null
    },
    {
      "workload_id": "recovery_year_2021",
      "workload_name": "Recovery year 2021",
      "workload_type": "sequential_concat",
      "description": "A complete recovery-era year representing a full operational reporting period.",
      "file_count": 12,
      "input_ids": "yellow_2021_01|yellow_2021_02|yellow_2021_03|yellow_2021_04|yellow_2021_05|yellow_2021_06|yellow_2021_07|yellow_2021_08|yellow_2021_09|yellow_2021_10|yellow_2021_11|yellow_2021_12",
      "relative_paths": "data/derived-csv/yellow/2021/yellow_tripdata_2021-01.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-02.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-03.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-04.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-05.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-06.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-07.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-08.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-09.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-10.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-11.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-12.csv",
      "total_parquet_bytes": 479580729,
      "total_csv_bytes": 3375693147,
      "expected_rows": 30904308,
      "expected_columns": 19,
      "default_status": "ok",
      "pyarrow_status": "ok",
      "default_total_median_ms": 63704.9266,
      "pyarrow_total_median_ms": 14122.5849,
      "speedup_default_over_pyarrow": 4.5108545674241265,
      "winner": "pandas_pyarrow",
      "default_error": null,
      "pyarrow_error": null
    },
    {
      "workload_id": "tiny_pandemic_q2_2020",
      "workload_name": "Tiny pandemic Q2 2020",
      "workload_type": "sequential_concat",
      "description": "Small three-file operational import. Includes the April 2020 low-volume file and early recovery months.",
      "file_count": 3,
      "input_ids": "yellow_2020_04|yellow_2020_05|yellow_2020_06",
      "relative_paths": "data/derived-csv/yellow/2020/yellow_tripdata_2020-04.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-05.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-06.csv",
      "total_parquet_bytes": 20177842,
      "total_csv_bytes": 121164180,
      "expected_rows": 1136285,
      "expected_columns": 19,
      "default_status": "ok",
      "pyarrow_status": "ok",
      "default_total_median_ms": 2377.5685,
      "pyarrow_total_median_ms": 224.8791,
      "speedup_default_over_pyarrow": 10.572652149532793,
      "winner": "pandas_pyarrow",
      "default_error": null,
      "pyarrow_error": null
    }
  ],
  "reader_results": [
    {
      "workload_id": "cross_year_january",
      "workload_name": "Cross-year January",
      "workload_type": "sequential_concat",
      "description": "Same calendar month across four years, allowing cross-year comparison without seasonal mismatch.",
      "reader": "pandas_default",
      "reader_order": 1,
      "status": "ok",
      "file_count": 4,
      "input_ids": "yellow_2019_01|yellow_2020_01|yellow_2021_01|yellow_2022_01",
      "relative_paths": "data/derived-csv/yellow/2019/yellow_tripdata_2019-01.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-01.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-01.csv|data/derived-csv/yellow/2022/yellow_tripdata_2022-01.csv",
      "total_parquet_bytes": 263828508,
      "total_csv_bytes": 1922566805,
      "expected_rows": 17935325,
      "expected_columns": 19,
      "observed_rows": 17935325,
      "observed_columns": 19,
      "first_read_phase_ms": 31632.607,
      "first_concat_phase_ms": 1001.0708,
      "first_total_ms": 32633.6778,
      "total_median_ms": 30334.972299999998,
      "total_avg_ms": 30951.6835,
      "total_min_ms": 29886.4004,
      "total_max_ms": 32633.6778,
      "total_stdev_ms": 1473.8157301005358,
      "final_dataframe_memory_usage_deep_bytes": 3678719707,
      "rss_before_bytes": 92602368,
      "rss_after_read_bytes": 3975553024,
      "rss_after_concat_bytes": 6271348736,
      "rss_peak_bytes": 6476337152,
      "rss_peak_delta_bytes": 6383734784,
      "error": null
    },
    {
      "workload_id": "cross_year_january",
      "workload_name": "Cross-year January",
      "workload_type": "sequential_concat",
      "description": "Same calendar month across four years, allowing cross-year comparison without seasonal mismatch.",
      "reader": "pandas_pyarrow",
      "reader_order": 2,
      "status": "ok",
      "file_count": 4,
      "input_ids": "yellow_2019_01|yellow_2020_01|yellow_2021_01|yellow_2022_01",
      "relative_paths": "data/derived-csv/yellow/2019/yellow_tripdata_2019-01.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-01.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-01.csv|data/derived-csv/yellow/2022/yellow_tripdata_2022-01.csv",
      "total_parquet_bytes": 263828508,
      "total_csv_bytes": 1922566805,
      "expected_rows": 17935325,
      "expected_columns": 19,
      "observed_rows": 17935325,
      "observed_columns": 19,
      "first_read_phase_ms": 3834.4563,
      "first_concat_phase_ms": 755.9674,
      "first_total_ms": 4590.423699999999,
      "total_median_ms": 5164.1278,
      "total_avg_ms": 5289.2187,
      "total_min_ms": 4590.423699999999,
      "total_max_ms": 6113.104600000001,
      "total_stdev_ms": 769.0091551817253,
      "final_dataframe_memory_usage_deep_bytes": 2743876950,
      "rss_before_bytes": 92385280,
      "rss_after_read_bytes": 3530039296,
      "rss_after_concat_bytes": 6113062912,
      "rss_peak_bytes": 7428263936,
      "rss_peak_delta_bytes": 7335878656,
      "error": null
    },
    {
      "workload_id": "high_volume_q1_2019",
      "workload_name": "High-volume Q1 2019",
      "workload_type": "sequential_concat",
      "description": "Three large pre-pandemic files representing a heavy quarterly export.",
      "reader": "pandas_pyarrow",
      "reader_order": 1,
      "status": "ok",
      "file_count": 3,
      "input_ids": "yellow_2019_01|yellow_2019_02|yellow_2019_03",
      "relative_paths": "data/derived-csv/yellow/2019/yellow_tripdata_2019-01.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-02.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-03.csv",
      "total_parquet_bytes": 329813031,
      "total_csv_bytes": 2426744010,
      "expected_rows": 22612607,
      "expected_columns": 19,
      "observed_rows": 22612607,
      "observed_columns": 19,
      "first_read_phase_ms": 4901.2086,
      "first_concat_phase_ms": 2715.355,
      "first_total_ms": 7616.5635999999995,
      "total_median_ms": 7616.5635999999995,
      "total_avg_ms": 8053.4149333333335,
      "total_min_ms": 7407.7276,
      "total_max_ms": 9135.9536,
      "total_stdev_ms": 943.303022546485,
      "final_dataframe_memory_usage_deep_bytes": 3459650239,
      "rss_before_bytes": 92246016,
      "rss_after_read_bytes": 6812979200,
      "rss_after_concat_bytes": 6950203392,
      "rss_peak_bytes": 9785323520,
      "rss_peak_delta_bytes": 9693077504,
      "error": null
    },
    {
      "workload_id": "high_volume_q1_2019",
      "workload_name": "High-volume Q1 2019",
      "workload_type": "sequential_concat",
      "description": "Three large pre-pandemic files representing a heavy quarterly export.",
      "reader": "pandas_default",
      "reader_order": 2,
      "status": "ok",
      "file_count": 3,
      "input_ids": "yellow_2019_01|yellow_2019_02|yellow_2019_03",
      "relative_paths": "data/derived-csv/yellow/2019/yellow_tripdata_2019-01.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-02.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-03.csv",
      "total_parquet_bytes": 329813031,
      "total_csv_bytes": 2426744010,
      "expected_rows": 22612607,
      "expected_columns": 19,
      "observed_rows": 22612607,
      "observed_columns": 19,
      "first_read_phase_ms": 30551.7552,
      "first_concat_phase_ms": 908.8683,
      "first_total_ms": 31460.623499999998,
      "total_median_ms": 27654.2271,
      "total_avg_ms": 28868.61303333333,
      "total_min_ms": 27490.988500000003,
      "total_max_ms": 31460.623499999998,
      "total_stdev_ms": 2246.230265269935,
      "final_dataframe_memory_usage_deep_bytes": 4638319336,
      "rss_before_bytes": 92434432,
      "rss_after_read_bytes": 5010333696,
      "rss_after_concat_bytes": 7904874496,
      "rss_peak_bytes": 8527609856,
      "rss_peak_delta_bytes": 8435175424,
      "error": null
    },
    {
      "workload_id": "high_volume_year_2019",
      "workload_name": "High-volume year 2019",
      "workload_type": "sequential_concat",
      "description": "Complete pre-pandemic year. Primary high-volume stress-test workload.",
      "reader": "pandas_default",
      "reader_order": 1,
      "status": "ok",
      "file_count": 12,
      "input_ids": "yellow_2019_01|yellow_2019_02|yellow_2019_03|yellow_2019_04|yellow_2019_05|yellow_2019_06|yellow_2019_07|yellow_2019_08|yellow_2019_09|yellow_2019_10|yellow_2019_11|yellow_2019_12",
      "relative_paths": "data/derived-csv/yellow/2019/yellow_tripdata_2019-01.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-02.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-03.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-04.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-05.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-06.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-07.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-08.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-09.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-10.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-11.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-12.csv",
      "total_parquet_bytes": 1243532931,
      "total_csv_bytes": 9146557593,
      "expected_rows": 84598444,
      "expected_columns": 19,
      "observed_rows": 84598444,
      "observed_columns": 19,
      "first_read_phase_ms": 105791.0183,
      "first_concat_phase_ms": 47074.4871,
      "first_total_ms": 152865.5054,
      "total_median_ms": 221708.58610000001,
      "total_avg_ms": 204139.04933333336,
      "total_min_ms": 152865.5054,
      "total_max_ms": 237843.0565,
      "total_stdev_ms": 45131.0593093572,
      "final_dataframe_memory_usage_deep_bytes": 17352811580,
      "rss_before_bytes": 92573696,
      "rss_after_read_bytes": 6947905536,
      "rss_after_concat_bytes": 4809728000,
      "rss_peak_bytes": 9690816512,
      "rss_peak_delta_bytes": 9598242816,
      "error": null
    },
    {
      "workload_id": "high_volume_year_2019",
      "workload_name": "High-volume year 2019",
      "workload_type": "sequential_concat",
      "description": "Complete pre-pandemic year. Primary high-volume stress-test workload.",
      "reader": "pandas_pyarrow",
      "reader_order": 2,
      "status": "ok",
      "file_count": 12,
      "input_ids": "yellow_2019_01|yellow_2019_02|yellow_2019_03|yellow_2019_04|yellow_2019_05|yellow_2019_06|yellow_2019_07|yellow_2019_08|yellow_2019_09|yellow_2019_10|yellow_2019_11|yellow_2019_12",
      "relative_paths": "data/derived-csv/yellow/2019/yellow_tripdata_2019-01.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-02.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-03.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-04.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-05.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-06.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-07.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-08.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-09.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-10.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-11.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-12.csv",
      "total_parquet_bytes": 1243532931,
      "total_csv_bytes": 9146557593,
      "expected_rows": 84598444,
      "expected_columns": 19,
      "observed_rows": 84598444,
      "observed_columns": 19,
      "first_read_phase_ms": 36935.0092,
      "first_concat_phase_ms": 51030.5747,
      "first_total_ms": 87965.5839,
      "total_median_ms": 87965.5839,
      "total_avg_ms": 90932.17683333333,
      "total_min_ms": 86806.003,
      "total_max_ms": 98024.9436,
      "total_stdev_ms": 6169.818657842348,
      "final_dataframe_memory_usage_deep_bytes": 12943181566,
      "rss_before_bytes": 94347264,
      "rss_after_read_bytes": 2398117888,
      "rss_after_concat_bytes": 6725177344,
      "rss_peak_bytes": 9906438144,
      "rss_peak_delta_bytes": 9812090880,
      "error": null
    },
    {
      "workload_id": "mixed_shape_workload",
      "workload_name": "Mixed-shape workload",
      "workload_type": "sequential_concat",
      "description": "Deliberately uneven analyst-folder workload: tiny, small, medium, large, and giant CSV files.",
      "reader": "pandas_pyarrow",
      "reader_order": 1,
      "status": "ok",
      "file_count": 5,
      "input_ids": "yellow_2020_04|yellow_2020_08|yellow_2021_06|yellow_2022_10|yellow_2019_03",
      "relative_paths": "data/derived-csv/yellow/2020/yellow_tripdata_2020-04.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-08.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-06.csv|data/derived-csv/yellow/2022/yellow_tripdata_2022-10.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-03.csv",
      "total_parquet_bytes": 238194985,
      "total_csv_bytes": 1700589745,
      "expected_rows": 15621654,
      "expected_columns": 19,
      "observed_rows": 15621654,
      "observed_columns": 19,
      "first_read_phase_ms": 3899.8477,
      "first_concat_phase_ms": 1209.1883,
      "first_total_ms": 5109.036,
      "total_median_ms": 4743.1474,
      "total_avg_ms": 4822.7712,
      "total_min_ms": 4616.1302,
      "total_max_ms": 5109.036,
      "total_stdev_ms": 255.917944004011,
      "final_dataframe_memory_usage_deep_bytes": 2389782879,
      "rss_before_bytes": 94650368,
      "rss_after_read_bytes": 4706050048,
      "rss_after_concat_bytes": 6956261376,
      "rss_peak_bytes": 9206603776,
      "rss_peak_delta_bytes": 9111953408,
      "error": null
    },
    {
      "workload_id": "mixed_shape_workload",
      "workload_name": "Mixed-shape workload",
      "workload_type": "sequential_concat",
      "description": "Deliberately uneven analyst-folder workload: tiny, small, medium, large, and giant CSV files.",
      "reader": "pandas_default",
      "reader_order": 2,
      "status": "ok",
      "file_count": 5,
      "input_ids": "yellow_2020_04|yellow_2020_08|yellow_2021_06|yellow_2022_10|yellow_2019_03",
      "relative_paths": "data/derived-csv/yellow/2020/yellow_tripdata_2020-04.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-08.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-06.csv|data/derived-csv/yellow/2022/yellow_tripdata_2022-10.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-03.csv",
      "total_parquet_bytes": 238194985,
      "total_csv_bytes": 1700589745,
      "expected_rows": 15621654,
      "expected_columns": 19,
      "observed_rows": 15621654,
      "observed_columns": 19,
      "first_read_phase_ms": 29640.4264,
      "first_concat_phase_ms": 836.3423,
      "first_total_ms": 30476.7687,
      "total_median_ms": 30476.7687,
      "total_avg_ms": 30218.003566666666,
      "total_min_ms": 29695.774500000003,
      "total_max_ms": 30481.4675,
      "total_stdev_ms": 452.26974056862514,
      "final_dataframe_memory_usage_deep_bytes": 3204011059,
      "rss_before_bytes": 94441472,
      "rss_after_read_bytes": 3473612800,
      "rss_after_concat_bytes": 5473329152,
      "rss_peak_bytes": 5450629120,
      "rss_peak_delta_bytes": 5356187648,
      "error": null
    },
    {
      "workload_id": "recovery_h2_2020",
      "workload_name": "Recovery H2 2020",
      "workload_type": "sequential_concat",
      "description": "Six sequential files showing gradual recovery after the initial pandemic collapse.",
      "reader": "pandas_default",
      "reader_order": 1,
      "status": "ok",
      "file_count": 6,
      "input_ids": "yellow_2020_07|yellow_2020_08|yellow_2020_09|yellow_2020_10|yellow_2020_11|yellow_2020_12",
      "relative_paths": "data/derived-csv/yellow/2020/yellow_tripdata_2020-07.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-08.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-09.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-10.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-11.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-12.csv",
      "total_parquet_bytes": 124281459,
      "total_csv_bytes": 839598650,
      "expected_rows": 7800745,
      "expected_columns": 19,
      "observed_rows": 7800745,
      "observed_columns": 19,
      "first_read_phase_ms": 13579.0373,
      "first_concat_phase_ms": 308.5527,
      "first_total_ms": 13887.59,
      "total_median_ms": 15175.2099,
      "total_avg_ms": 14801.035666666668,
      "total_min_ms": 13887.59,
      "total_max_ms": 15340.3071,
      "total_stdev_ms": 795.3625027784772,
      "final_dataframe_memory_usage_deep_bytes": 1599599268,
      "rss_before_bytes": 94490624,
      "rss_after_read_bytes": 1800155136,
      "rss_after_concat_bytes": 2798731264,
      "rss_peak_bytes": 2784063488,
      "rss_peak_delta_bytes": 2689572864,
      "error": null
    },
    {
      "workload_id": "recovery_h2_2020",
      "workload_name": "Recovery H2 2020",
      "workload_type": "sequential_concat",
      "description": "Six sequential files showing gradual recovery after the initial pandemic collapse.",
      "reader": "pandas_pyarrow",
      "reader_order": 2,
      "status": "ok",
      "file_count": 6,
      "input_ids": "yellow_2020_07|yellow_2020_08|yellow_2020_09|yellow_2020_10|yellow_2020_11|yellow_2020_12",
      "relative_paths": "data/derived-csv/yellow/2020/yellow_tripdata_2020-07.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-08.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-09.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-10.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-11.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-12.csv",
      "total_parquet_bytes": 124281459,
      "total_csv_bytes": 839598650,
      "expected_rows": 7800745,
      "expected_columns": 19,
      "observed_rows": 7800745,
      "observed_columns": 19,
      "first_read_phase_ms": 1429.6344,
      "first_concat_phase_ms": 246.0286,
      "first_total_ms": 1675.663,
      "total_median_ms": 1714.8481,
      "total_avg_ms": 1717.4277,
      "total_min_ms": 1675.663,
      "total_max_ms": 1761.772,
      "total_stdev_ms": 43.112419583804346,
      "final_dataframe_memory_usage_deep_bytes": 1193054830,
      "rss_before_bytes": 94527488,
      "rss_after_read_bytes": 2047303680,
      "rss_after_concat_bytes": 3170922496,
      "rss_peak_bytes": 3106471936,
      "rss_peak_delta_bytes": 3011944448,
      "error": null
    },
    {
      "workload_id": "recovery_year_2021",
      "workload_name": "Recovery year 2021",
      "workload_type": "sequential_concat",
      "description": "A complete recovery-era year representing a full operational reporting period.",
      "reader": "pandas_pyarrow",
      "reader_order": 1,
      "status": "ok",
      "file_count": 12,
      "input_ids": "yellow_2021_01|yellow_2021_02|yellow_2021_03|yellow_2021_04|yellow_2021_05|yellow_2021_06|yellow_2021_07|yellow_2021_08|yellow_2021_09|yellow_2021_10|yellow_2021_11|yellow_2021_12",
      "relative_paths": "data/derived-csv/yellow/2021/yellow_tripdata_2021-01.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-02.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-03.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-04.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-05.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-06.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-07.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-08.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-09.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-10.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-11.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-12.csv",
      "total_parquet_bytes": 479580729,
      "total_csv_bytes": 3375693147,
      "expected_rows": 30904308,
      "expected_columns": 19,
      "observed_rows": 30904308,
      "observed_columns": 19,
      "first_read_phase_ms": 6740.8957,
      "first_concat_phase_ms": 8411.614,
      "first_total_ms": 15152.509699999999,
      "total_median_ms": 14122.5849,
      "total_avg_ms": 13519.2102,
      "total_min_ms": 11282.536,
      "total_max_ms": 15152.509699999999,
      "total_stdev_ms": 2004.3003470345925,
      "final_dataframe_memory_usage_deep_bytes": 4727072845,
      "rss_before_bytes": 94412800,
      "rss_after_read_bytes": 6289702912,
      "rss_after_concat_bytes": 6734311424,
      "rss_peak_bytes": 9488764928,
      "rss_peak_delta_bytes": 9394352128,
      "error": null
    },
    {
      "workload_id": "recovery_year_2021",
      "workload_name": "Recovery year 2021",
      "workload_type": "sequential_concat",
      "description": "A complete recovery-era year representing a full operational reporting period.",
      "reader": "pandas_default",
      "reader_order": 2,
      "status": "ok",
      "file_count": 12,
      "input_ids": "yellow_2021_01|yellow_2021_02|yellow_2021_03|yellow_2021_04|yellow_2021_05|yellow_2021_06|yellow_2021_07|yellow_2021_08|yellow_2021_09|yellow_2021_10|yellow_2021_11|yellow_2021_12",
      "relative_paths": "data/derived-csv/yellow/2021/yellow_tripdata_2021-01.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-02.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-03.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-04.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-05.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-06.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-07.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-08.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-09.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-10.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-11.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-12.csv",
      "total_parquet_bytes": 479580729,
      "total_csv_bytes": 3375693147,
      "expected_rows": 30904308,
      "expected_columns": 19,
      "observed_rows": 30904308,
      "observed_columns": 19,
      "first_read_phase_ms": 59591.9161,
      "first_concat_phase_ms": 3997.0814,
      "first_total_ms": 63588.997500000005,
      "total_median_ms": 63704.9266,
      "total_avg_ms": 63954.88796666667,
      "total_min_ms": 63588.997500000005,
      "total_max_ms": 64570.7398,
      "total_stdev_ms": 536.4839228921252,
      "final_dataframe_memory_usage_deep_bytes": 6337767621,
      "rss_before_bytes": 94420992,
      "rss_after_read_bytes": 6906486784,
      "rss_after_concat_bytes": 8262721536,
      "rss_peak_bytes": 9744281600,
      "rss_peak_delta_bytes": 9649860608,
      "error": null
    },
    {
      "workload_id": "tiny_pandemic_q2_2020",
      "workload_name": "Tiny pandemic Q2 2020",
      "workload_type": "sequential_concat",
      "description": "Small three-file operational import. Includes the April 2020 low-volume file and early recovery months.",
      "reader": "pandas_default",
      "reader_order": 1,
      "status": "ok",
      "file_count": 3,
      "input_ids": "yellow_2020_04|yellow_2020_05|yellow_2020_06",
      "relative_paths": "data/derived-csv/yellow/2020/yellow_tripdata_2020-04.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-05.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-06.csv",
      "total_parquet_bytes": 20177842,
      "total_csv_bytes": 121164180,
      "expected_rows": 1136285,
      "expected_columns": 19,
      "observed_rows": 1136285,
      "observed_columns": 19,
      "first_read_phase_ms": 1699.3755,
      "first_concat_phase_ms": 30.2904,
      "first_total_ms": 1729.6659000000002,
      "total_median_ms": 2377.5685,
      "total_avg_ms": 2168.7528,
      "total_min_ms": 1729.6659000000002,
      "total_max_ms": 2399.0240000000003,
      "total_stdev_ms": 380.41170320295083,
      "final_dataframe_memory_usage_deep_bytes": 232951395,
      "rss_before_bytes": 94535680,
      "rss_after_read_bytes": 363241472,
      "rss_after_concat_bytes": 508805120,
      "rss_peak_bytes": 481234944,
      "rss_peak_delta_bytes": 386699264,
      "error": null
    },
    {
      "workload_id": "tiny_pandemic_q2_2020",
      "workload_name": "Tiny pandemic Q2 2020",
      "workload_type": "sequential_concat",
      "description": "Small three-file operational import. Includes the April 2020 low-volume file and early recovery months.",
      "reader": "pandas_pyarrow",
      "reader_order": 2,
      "status": "ok",
      "file_count": 3,
      "input_ids": "yellow_2020_04|yellow_2020_05|yellow_2020_06",
      "relative_paths": "data/derived-csv/yellow/2020/yellow_tripdata_2020-04.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-05.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-06.csv",
      "total_parquet_bytes": 20177842,
      "total_csv_bytes": 121164180,
      "expected_rows": 1136285,
      "expected_columns": 19,
      "observed_rows": 1136285,
      "observed_columns": 19,
      "first_read_phase_ms": 222.4533,
      "first_concat_phase_ms": 37.7226,
      "first_total_ms": 260.1759,
      "total_median_ms": 224.8791,
      "total_avg_ms": 236.50699999999998,
      "total_min_ms": 224.466,
      "total_max_ms": 260.1759,
      "total_stdev_ms": 20.49890932001018,
      "final_dataframe_memory_usage_deep_bytes": 173739859,
      "rss_before_bytes": 94388224,
      "rss_after_read_bytes": 516165632,
      "rss_after_concat_bytes": 680108032,
      "rss_peak_bytes": 805863424,
      "rss_peak_delta_bytes": 711475200,
      "error": null
    }
  ],
  "detailed_results": [
    {
      "workload": {
        "workload_id": "cross_year_january",
        "workload_name": "Cross-year January",
        "workload_type": "sequential_concat",
        "description": "Same calendar month across four years, allowing cross-year comparison without seasonal mismatch.",
        "members": [
          {
            "file_order": 1,
            "year": "2019",
            "month": "01",
            "taxi_type": "yellow",
            "shape_label": "giant",
            "input_id": "yellow_2019_01",
            "relative_path": "data/derived-csv/yellow/2019/yellow_tripdata_2019-01.csv",
            "expected_rows": 7696617,
            "expected_columns": 19,
            "parquet_size_bytes": 110439634,
            "csv_size_bytes": 810646679
          },
          {
            "file_order": 2,
            "year": "2020",
            "month": "01",
            "taxi_type": "yellow",
            "shape_label": "giant",
            "input_id": "yellow_2020_01",
            "relative_path": "data/derived-csv/yellow/2020/yellow_tripdata_2020-01.csv",
            "expected_rows": 6405008,
            "expected_columns": 19,
            "parquet_size_bytes": 93562858,
            "csv_size_bytes": 694483010
          },
          {
            "file_order": 3,
            "year": "2021",
            "month": "01",
            "taxi_type": "yellow",
            "shape_label": "medium",
            "input_id": "yellow_2021_01",
            "relative_path": "data/derived-csv/yellow/2021/yellow_tripdata_2021-01.csv",
            "expected_rows": 1369769,
            "expected_columns": 19,
            "parquet_size_bytes": 21686067,
            "csv_size_bytes": 147424437
          },
          {
            "file_order": 4,
            "year": "2022",
            "month": "01",
            "taxi_type": "yellow",
            "shape_label": "medium",
            "input_id": "yellow_2022_01",
            "relative_path": "data/derived-csv/yellow/2022/yellow_tripdata_2022-01.csv",
            "expected_rows": 2463931,
            "expected_columns": 19,
            "parquet_size_bytes": 38139949,
            "csv_size_bytes": 270012679
          }
        ],
        "file_count": 4,
        "input_ids": [
          "yellow_2019_01",
          "yellow_2020_01",
          "yellow_2021_01",
          "yellow_2022_01"
        ],
        "relative_paths": [
          "data/derived-csv/yellow/2019/yellow_tripdata_2019-01.csv",
          "data/derived-csv/yellow/2020/yellow_tripdata_2020-01.csv",
          "data/derived-csv/yellow/2021/yellow_tripdata_2021-01.csv",
          "data/derived-csv/yellow/2022/yellow_tripdata_2022-01.csv"
        ],
        "total_parquet_bytes": 263828508,
        "total_csv_bytes": 1922566805,
        "expected_rows": 17935325,
        "expected_columns": 19
      },
      "reader_results": {
        "pandas_default": {
          "reader": "pandas_default",
          "status": "ok",
          "first_run": {
            "read_phase_ms": 31632.607,
            "concat_phase_ms": 1001.0708,
            "total_ms": 32633.6778,
            "observed_rows": 17935325,
            "observed_columns": 19,
            "final_dataframe_memory_usage_deep_bytes": 3678719707,
            "rss_before_bytes": 92602368,
            "rss_after_read_bytes": 3975553024,
            "rss_after_concat_bytes": 6271348736,
            "run_index": 1
          },
          "runs": [
            {
              "read_phase_ms": 31632.607,
              "concat_phase_ms": 1001.0708,
              "total_ms": 32633.6778,
              "observed_rows": 17935325,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 3678719707,
              "rss_before_bytes": 92602368,
              "rss_after_read_bytes": 3975553024,
              "rss_after_concat_bytes": 6271348736,
              "run_index": 1
            },
            {
              "read_phase_ms": 29328.7136,
              "concat_phase_ms": 1006.2587,
              "total_ms": 30334.972299999998,
              "observed_rows": 17935325,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 3678719707,
              "rss_before_bytes": 1672466432,
              "rss_after_read_bytes": 3981496320,
              "rss_after_concat_bytes": 5596995584,
              "run_index": 2
            },
            {
              "read_phase_ms": 29043.2978,
              "concat_phase_ms": 843.1026,
              "total_ms": 29886.4004,
              "observed_rows": 17935325,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 3678719707,
              "rss_before_bytes": 996237312,
              "rss_after_read_bytes": 4280614912,
              "rss_after_concat_bytes": 6573948928,
              "run_index": 3
            }
          ],
          "error": null,
          "traceback": null,
          "total_median_ms": 30334.972299999998,
          "total_avg_ms": 30951.6835,
          "total_min_ms": 29886.4004,
          "total_max_ms": 32633.6778,
          "total_stdev_ms": 1473.8157301005358,
          "rss_peak_bytes": 6476337152,
          "rss_peak_delta_bytes": 6383734784,
          "observed_rows": 17935325,
          "observed_columns": 19,
          "stderr": "C:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat("
        },
        "pandas_pyarrow": {
          "reader": "pandas_pyarrow",
          "status": "ok",
          "first_run": {
            "read_phase_ms": 3834.4563,
            "concat_phase_ms": 755.9674,
            "total_ms": 4590.423699999999,
            "observed_rows": 17935325,
            "observed_columns": 19,
            "final_dataframe_memory_usage_deep_bytes": 2743876950,
            "rss_before_bytes": 92385280,
            "rss_after_read_bytes": 3530039296,
            "rss_after_concat_bytes": 6113062912,
            "run_index": 1
          },
          "runs": [
            {
              "read_phase_ms": 3834.4563,
              "concat_phase_ms": 755.9674,
              "total_ms": 4590.423699999999,
              "observed_rows": 17935325,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 2743876950,
              "rss_before_bytes": 92385280,
              "rss_after_read_bytes": 3530039296,
              "rss_after_concat_bytes": 6113062912,
              "run_index": 1
            },
            {
              "read_phase_ms": 3441.2417,
              "concat_phase_ms": 1722.8861,
              "total_ms": 5164.1278,
              "observed_rows": 17935325,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 2743876950,
              "rss_before_bytes": 3523739648,
              "rss_after_read_bytes": 6175346688,
              "rss_after_concat_bytes": 5431517184,
              "run_index": 2
            },
            {
              "read_phase_ms": 4248.38,
              "concat_phase_ms": 1864.7246,
              "total_ms": 6113.104600000001,
              "observed_rows": 17935325,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 2743876950,
              "rss_before_bytes": 2781720576,
              "rss_after_read_bytes": 4871114752,
              "rss_after_concat_bytes": 7454982144,
              "run_index": 3
            }
          ],
          "error": null,
          "traceback": null,
          "total_median_ms": 5164.1278,
          "total_avg_ms": 5289.2187,
          "total_min_ms": 4590.423699999999,
          "total_max_ms": 6113.104600000001,
          "total_stdev_ms": 769.0091551817253,
          "rss_peak_bytes": 7428263936,
          "rss_peak_delta_bytes": 7335878656,
          "observed_rows": 17935325,
          "observed_columns": 19,
          "stderr": "C:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat("
        }
      },
      "comparison": {
        "workload_id": "cross_year_january",
        "workload_name": "Cross-year January",
        "workload_type": "sequential_concat",
        "description": "Same calendar month across four years, allowing cross-year comparison without seasonal mismatch.",
        "file_count": 4,
        "input_ids": "yellow_2019_01|yellow_2020_01|yellow_2021_01|yellow_2022_01",
        "relative_paths": "data/derived-csv/yellow/2019/yellow_tripdata_2019-01.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-01.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-01.csv|data/derived-csv/yellow/2022/yellow_tripdata_2022-01.csv",
        "total_parquet_bytes": 263828508,
        "total_csv_bytes": 1922566805,
        "expected_rows": 17935325,
        "expected_columns": 19,
        "default_status": "ok",
        "pyarrow_status": "ok",
        "default_total_median_ms": 30334.972299999998,
        "pyarrow_total_median_ms": 5164.1278,
        "speedup_default_over_pyarrow": 5.874171491263248,
        "winner": "pandas_pyarrow",
        "default_error": null,
        "pyarrow_error": null
      }
    },
    {
      "workload": {
        "workload_id": "high_volume_q1_2019",
        "workload_name": "High-volume Q1 2019",
        "workload_type": "sequential_concat",
        "description": "Three large pre-pandemic files representing a heavy quarterly export.",
        "members": [
          {
            "file_order": 1,
            "year": "2019",
            "month": "01",
            "taxi_type": "yellow",
            "shape_label": "giant",
            "input_id": "yellow_2019_01",
            "relative_path": "data/derived-csv/yellow/2019/yellow_tripdata_2019-01.csv",
            "expected_rows": 7696617,
            "expected_columns": 19,
            "parquet_size_bytes": 110439634,
            "csv_size_bytes": 810646679
          },
          {
            "file_order": 2,
            "year": "2019",
            "month": "02",
            "taxi_type": "yellow",
            "shape_label": "giant",
            "input_id": "yellow_2019_02",
            "relative_path": "data/derived-csv/yellow/2019/yellow_tripdata_2019-02.csv",
            "expected_rows": 7049370,
            "expected_columns": 19,
            "parquet_size_bytes": 103356025,
            "csv_size_bytes": 763317886
          },
          {
            "file_order": 3,
            "year": "2019",
            "month": "03",
            "taxi_type": "yellow",
            "shape_label": "giant",
            "input_id": "yellow_2019_03",
            "relative_path": "data/derived-csv/yellow/2019/yellow_tripdata_2019-03.csv",
            "expected_rows": 7866620,
            "expected_columns": 19,
            "parquet_size_bytes": 116017372,
            "csv_size_bytes": 852779445
          }
        ],
        "file_count": 3,
        "input_ids": [
          "yellow_2019_01",
          "yellow_2019_02",
          "yellow_2019_03"
        ],
        "relative_paths": [
          "data/derived-csv/yellow/2019/yellow_tripdata_2019-01.csv",
          "data/derived-csv/yellow/2019/yellow_tripdata_2019-02.csv",
          "data/derived-csv/yellow/2019/yellow_tripdata_2019-03.csv"
        ],
        "total_parquet_bytes": 329813031,
        "total_csv_bytes": 2426744010,
        "expected_rows": 22612607,
        "expected_columns": 19
      },
      "reader_results": {
        "pandas_pyarrow": {
          "reader": "pandas_pyarrow",
          "status": "ok",
          "first_run": {
            "read_phase_ms": 4901.2086,
            "concat_phase_ms": 2715.355,
            "total_ms": 7616.5635999999995,
            "observed_rows": 22612607,
            "observed_columns": 19,
            "final_dataframe_memory_usage_deep_bytes": 3459650239,
            "rss_before_bytes": 92246016,
            "rss_after_read_bytes": 6812979200,
            "rss_after_concat_bytes": 6950203392,
            "run_index": 1
          },
          "runs": [
            {
              "read_phase_ms": 4901.2086,
              "concat_phase_ms": 2715.355,
              "total_ms": 7616.5635999999995,
              "observed_rows": 22612607,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 3459650239,
              "rss_before_bytes": 92246016,
              "rss_after_read_bytes": 6812979200,
              "rss_after_concat_bytes": 6950203392,
              "run_index": 1
            },
            {
              "read_phase_ms": 4656.7007,
              "concat_phase_ms": 4479.2529,
              "total_ms": 9135.9536,
              "observed_rows": 22612607,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 3459650239,
              "rss_before_bytes": 3728052224,
              "rss_after_read_bytes": 8068947968,
              "rss_after_concat_bytes": 3294191616,
              "run_index": 2
            },
            {
              "read_phase_ms": 4831.4925,
              "concat_phase_ms": 2576.2351,
              "total_ms": 7407.7276,
              "observed_rows": 22612607,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 3459650239,
              "rss_before_bytes": 1346064384,
              "rss_after_read_bytes": 7221387264,
              "rss_after_concat_bytes": 5014224896,
              "run_index": 3
            }
          ],
          "error": null,
          "traceback": null,
          "total_median_ms": 7616.5635999999995,
          "total_avg_ms": 8053.4149333333335,
          "total_min_ms": 7407.7276,
          "total_max_ms": 9135.9536,
          "total_stdev_ms": 943.303022546485,
          "rss_peak_bytes": 9785323520,
          "rss_peak_delta_bytes": 9693077504,
          "observed_rows": 22612607,
          "observed_columns": 19,
          "stderr": "C:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat("
        },
        "pandas_default": {
          "reader": "pandas_default",
          "status": "ok",
          "first_run": {
            "read_phase_ms": 30551.7552,
            "concat_phase_ms": 908.8683,
            "total_ms": 31460.623499999998,
            "observed_rows": 22612607,
            "observed_columns": 19,
            "final_dataframe_memory_usage_deep_bytes": 4638319336,
            "rss_before_bytes": 92434432,
            "rss_after_read_bytes": 5010333696,
            "rss_after_concat_bytes": 7904874496,
            "run_index": 1
          },
          "runs": [
            {
              "read_phase_ms": 30551.7552,
              "concat_phase_ms": 908.8683,
              "total_ms": 31460.623499999998,
              "observed_rows": 22612607,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 4638319336,
              "rss_before_bytes": 92434432,
              "rss_after_read_bytes": 5010333696,
              "rss_after_concat_bytes": 7904874496,
              "run_index": 1
            },
            {
              "read_phase_ms": 26566.6945,
              "concat_phase_ms": 924.294,
              "total_ms": 27490.988500000003,
              "observed_rows": 22612607,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 4638319336,
              "rss_before_bytes": 2106470400,
              "rss_after_read_bytes": 5185171456,
              "rss_after_concat_bytes": 8079634432,
              "run_index": 2
            },
            {
              "read_phase_ms": 26623.6617,
              "concat_phase_ms": 1030.5654,
              "total_ms": 27654.2271,
              "observed_rows": 22612607,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 4638319336,
              "rss_before_bytes": 2256269312,
              "rss_after_read_bytes": 5714444288,
              "rss_after_concat_bytes": 8580526080,
              "run_index": 3
            }
          ],
          "error": null,
          "traceback": null,
          "total_median_ms": 27654.2271,
          "total_avg_ms": 28868.61303333333,
          "total_min_ms": 27490.988500000003,
          "total_max_ms": 31460.623499999998,
          "total_stdev_ms": 2246.230265269935,
          "rss_peak_bytes": 8527609856,
          "rss_peak_delta_bytes": 8435175424,
          "observed_rows": 22612607,
          "observed_columns": 19,
          "stderr": "C:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat("
        }
      },
      "comparison": {
        "workload_id": "high_volume_q1_2019",
        "workload_name": "High-volume Q1 2019",
        "workload_type": "sequential_concat",
        "description": "Three large pre-pandemic files representing a heavy quarterly export.",
        "file_count": 3,
        "input_ids": "yellow_2019_01|yellow_2019_02|yellow_2019_03",
        "relative_paths": "data/derived-csv/yellow/2019/yellow_tripdata_2019-01.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-02.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-03.csv",
        "total_parquet_bytes": 329813031,
        "total_csv_bytes": 2426744010,
        "expected_rows": 22612607,
        "expected_columns": 19,
        "default_status": "ok",
        "pyarrow_status": "ok",
        "default_total_median_ms": 27654.2271,
        "pyarrow_total_median_ms": 7616.5635999999995,
        "speedup_default_over_pyarrow": 3.6308010478636326,
        "winner": "pandas_pyarrow",
        "default_error": null,
        "pyarrow_error": null
      }
    },
    {
      "workload": {
        "workload_id": "high_volume_year_2019",
        "workload_name": "High-volume year 2019",
        "workload_type": "sequential_concat",
        "description": "Complete pre-pandemic year. Primary high-volume stress-test workload.",
        "members": [
          {
            "file_order": 1,
            "year": "2019",
            "month": "01",
            "taxi_type": "yellow",
            "shape_label": "giant",
            "input_id": "yellow_2019_01",
            "relative_path": "data/derived-csv/yellow/2019/yellow_tripdata_2019-01.csv",
            "expected_rows": 7696617,
            "expected_columns": 19,
            "parquet_size_bytes": 110439634,
            "csv_size_bytes": 810646679
          },
          {
            "file_order": 2,
            "year": "2019",
            "month": "02",
            "taxi_type": "yellow",
            "shape_label": "giant",
            "input_id": "yellow_2019_02",
            "relative_path": "data/derived-csv/yellow/2019/yellow_tripdata_2019-02.csv",
            "expected_rows": 7049370,
            "expected_columns": 19,
            "parquet_size_bytes": 103356025,
            "csv_size_bytes": 763317886
          },
          {
            "file_order": 3,
            "year": "2019",
            "month": "03",
            "taxi_type": "yellow",
            "shape_label": "giant",
            "input_id": "yellow_2019_03",
            "relative_path": "data/derived-csv/yellow/2019/yellow_tripdata_2019-03.csv",
            "expected_rows": 7866620,
            "expected_columns": 19,
            "parquet_size_bytes": 116017372,
            "csv_size_bytes": 852779445
          },
          {
            "file_order": 4,
            "year": "2019",
            "month": "04",
            "taxi_type": "yellow",
            "shape_label": "giant",
            "input_id": "yellow_2019_04",
            "relative_path": "data/derived-csv/yellow/2019/yellow_tripdata_2019-04.csv",
            "expected_rows": 7475949,
            "expected_columns": 19,
            "parquet_size_bytes": 110139137,
            "csv_size_bytes": 810387251
          },
          {
            "file_order": 5,
            "year": "2019",
            "month": "05",
            "taxi_type": "yellow",
            "shape_label": "giant",
            "input_id": "yellow_2019_05",
            "relative_path": "data/derived-csv/yellow/2019/yellow_tripdata_2019-05.csv",
            "expected_rows": 7598445,
            "expected_columns": 19,
            "parquet_size_bytes": 111478943,
            "csv_size_bytes": 823824137
          },
          {
            "file_order": 6,
            "year": "2019",
            "month": "06",
            "taxi_type": "yellow",
            "shape_label": "giant",
            "input_id": "yellow_2019_06",
            "relative_path": "data/derived-csv/yellow/2019/yellow_tripdata_2019-06.csv",
            "expected_rows": 6971560,
            "expected_columns": 19,
            "parquet_size_bytes": 102903344,
            "csv_size_bytes": 755671945
          },
          {
            "file_order": 7,
            "year": "2019",
            "month": "07",
            "taxi_type": "yellow",
            "shape_label": "giant",
            "input_id": "yellow_2019_07",
            "relative_path": "data/derived-csv/yellow/2019/yellow_tripdata_2019-07.csv",
            "expected_rows": 6310419,
            "expected_columns": 19,
            "parquet_size_bytes": 93877343,
            "csv_size_bytes": 683547392
          },
          {
            "file_order": 8,
            "year": "2019",
            "month": "08",
            "taxi_type": "yellow",
            "shape_label": "giant",
            "input_id": "yellow_2019_08",
            "relative_path": "data/derived-csv/yellow/2019/yellow_tripdata_2019-08.csv",
            "expected_rows": 6073357,
            "expected_columns": 19,
            "parquet_size_bytes": 89999675,
            "csv_size_bytes": 657771137
          },
          {
            "file_order": 9,
            "year": "2019",
            "month": "09",
            "taxi_type": "yellow",
            "shape_label": "giant",
            "input_id": "yellow_2019_09",
            "relative_path": "data/derived-csv/yellow/2019/yellow_tripdata_2019-09.csv",
            "expected_rows": 6567788,
            "expected_columns": 19,
            "parquet_size_bytes": 97110325,
            "csv_size_bytes": 712192048
          },
          {
            "file_order": 10,
            "year": "2019",
            "month": "10",
            "taxi_type": "yellow",
            "shape_label": "giant",
            "input_id": "yellow_2019_10",
            "relative_path": "data/derived-csv/yellow/2019/yellow_tripdata_2019-10.csv",
            "expected_rows": 7213891,
            "expected_columns": 19,
            "parquet_size_bytes": 106293373,
            "csv_size_bytes": 782626216
          },
          {
            "file_order": 11,
            "year": "2019",
            "month": "11",
            "taxi_type": "yellow",
            "shape_label": "giant",
            "input_id": "yellow_2019_11",
            "relative_path": "data/derived-csv/yellow/2019/yellow_tripdata_2019-11.csv",
            "expected_rows": 6878111,
            "expected_columns": 19,
            "parquet_size_bytes": 100872983,
            "csv_size_bytes": 746063129
          },
          {
            "file_order": 12,
            "year": "2019",
            "month": "12",
            "taxi_type": "yellow",
            "shape_label": "giant",
            "input_id": "yellow_2019_12",
            "relative_path": "data/derived-csv/yellow/2019/yellow_tripdata_2019-12.csv",
            "expected_rows": 6896317,
            "expected_columns": 19,
            "parquet_size_bytes": 101044777,
            "csv_size_bytes": 747730328
          }
        ],
        "file_count": 12,
        "input_ids": [
          "yellow_2019_01",
          "yellow_2019_02",
          "yellow_2019_03",
          "yellow_2019_04",
          "yellow_2019_05",
          "yellow_2019_06",
          "yellow_2019_07",
          "yellow_2019_08",
          "yellow_2019_09",
          "yellow_2019_10",
          "yellow_2019_11",
          "yellow_2019_12"
        ],
        "relative_paths": [
          "data/derived-csv/yellow/2019/yellow_tripdata_2019-01.csv",
          "data/derived-csv/yellow/2019/yellow_tripdata_2019-02.csv",
          "data/derived-csv/yellow/2019/yellow_tripdata_2019-03.csv",
          "data/derived-csv/yellow/2019/yellow_tripdata_2019-04.csv",
          "data/derived-csv/yellow/2019/yellow_tripdata_2019-05.csv",
          "data/derived-csv/yellow/2019/yellow_tripdata_2019-06.csv",
          "data/derived-csv/yellow/2019/yellow_tripdata_2019-07.csv",
          "data/derived-csv/yellow/2019/yellow_tripdata_2019-08.csv",
          "data/derived-csv/yellow/2019/yellow_tripdata_2019-09.csv",
          "data/derived-csv/yellow/2019/yellow_tripdata_2019-10.csv",
          "data/derived-csv/yellow/2019/yellow_tripdata_2019-11.csv",
          "data/derived-csv/yellow/2019/yellow_tripdata_2019-12.csv"
        ],
        "total_parquet_bytes": 1243532931,
        "total_csv_bytes": 9146557593,
        "expected_rows": 84598444,
        "expected_columns": 19
      },
      "reader_results": {
        "pandas_default": {
          "reader": "pandas_default",
          "status": "ok",
          "first_run": {
            "read_phase_ms": 105791.0183,
            "concat_phase_ms": 47074.4871,
            "total_ms": 152865.5054,
            "observed_rows": 84598444,
            "observed_columns": 19,
            "final_dataframe_memory_usage_deep_bytes": 17352811580,
            "rss_before_bytes": 92573696,
            "rss_after_read_bytes": 6947905536,
            "rss_after_concat_bytes": 4809728000,
            "run_index": 1
          },
          "runs": [
            {
              "read_phase_ms": 105791.0183,
              "concat_phase_ms": 47074.4871,
              "total_ms": 152865.5054,
              "observed_rows": 84598444,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 17352811580,
              "rss_before_bytes": 92573696,
              "rss_after_read_bytes": 6947905536,
              "rss_after_concat_bytes": 4809728000,
              "run_index": 1
            },
            {
              "read_phase_ms": 196079.0531,
              "concat_phase_ms": 41764.0034,
              "total_ms": 237843.0565,
              "observed_rows": 84598444,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 17352811580,
              "rss_before_bytes": 20385792,
              "rss_after_read_bytes": 5534613504,
              "rss_after_concat_bytes": 3353780224,
              "run_index": 2
            },
            {
              "read_phase_ms": 180636.9001,
              "concat_phase_ms": 41071.686,
              "total_ms": 221708.58610000001,
              "observed_rows": 84598444,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 17352811580,
              "rss_before_bytes": 15753216,
              "rss_after_read_bytes": 6763778048,
              "rss_after_concat_bytes": 6911700992,
              "run_index": 3
            }
          ],
          "error": null,
          "traceback": null,
          "total_median_ms": 221708.58610000001,
          "total_avg_ms": 204139.04933333336,
          "total_min_ms": 152865.5054,
          "total_max_ms": 237843.0565,
          "total_stdev_ms": 45131.0593093572,
          "rss_peak_bytes": 9690816512,
          "rss_peak_delta_bytes": 9598242816,
          "observed_rows": 84598444,
          "observed_columns": 19,
          "stderr": "C:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat("
        },
        "pandas_pyarrow": {
          "reader": "pandas_pyarrow",
          "status": "ok",
          "first_run": {
            "read_phase_ms": 36935.0092,
            "concat_phase_ms": 51030.5747,
            "total_ms": 87965.5839,
            "observed_rows": 84598444,
            "observed_columns": 19,
            "final_dataframe_memory_usage_deep_bytes": 12943181566,
            "rss_before_bytes": 94347264,
            "rss_after_read_bytes": 2398117888,
            "rss_after_concat_bytes": 6725177344,
            "run_index": 1
          },
          "runs": [
            {
              "read_phase_ms": 36935.0092,
              "concat_phase_ms": 51030.5747,
              "total_ms": 87965.5839,
              "observed_rows": 84598444,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 12943181566,
              "rss_before_bytes": 94347264,
              "rss_after_read_bytes": 2398117888,
              "rss_after_concat_bytes": 6725177344,
              "run_index": 1
            },
            {
              "read_phase_ms": 34678.9414,
              "concat_phase_ms": 63346.0022,
              "total_ms": 98024.9436,
              "observed_rows": 84598444,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 12943181566,
              "rss_before_bytes": 3263102976,
              "rss_after_read_bytes": 4824895488,
              "rss_after_concat_bytes": 6548316160,
              "run_index": 2
            },
            {
              "read_phase_ms": 39774.4331,
              "concat_phase_ms": 47031.5699,
              "total_ms": 86806.003,
              "observed_rows": 84598444,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 12943181566,
              "rss_before_bytes": 3276140544,
              "rss_after_read_bytes": 9249550336,
              "rss_after_concat_bytes": 7684558848,
              "run_index": 3
            }
          ],
          "error": null,
          "traceback": null,
          "total_median_ms": 87965.5839,
          "total_avg_ms": 90932.17683333333,
          "total_min_ms": 86806.003,
          "total_max_ms": 98024.9436,
          "total_stdev_ms": 6169.818657842348,
          "rss_peak_bytes": 9906438144,
          "rss_peak_delta_bytes": 9812090880,
          "observed_rows": 84598444,
          "observed_columns": 19,
          "stderr": "Exception in thread Thread-2 (_monitor):\nTraceback (most recent call last):\n  File \"C:\\Python314\\Lib\\threading.py\", line 1082, in _bootstrap_inner\n    self._context.run(self.run)\n    ~~~~~~~~~~~~~~~~~^^^^^^^^^^\n  File \"C:\\Python314\\Lib\\threading.py\", line 1024, in run\n    self._target(*self._args, **self._kwargs)\n    ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"C:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py\", line 436, in _monitor\n    time.sleep(self.interval_seconds)\n    ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^\nOSError: [WinError 1450] Insufficient system resources exist to complete the requested service\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat("
        }
      },
      "comparison": {
        "workload_id": "high_volume_year_2019",
        "workload_name": "High-volume year 2019",
        "workload_type": "sequential_concat",
        "description": "Complete pre-pandemic year. Primary high-volume stress-test workload.",
        "file_count": 12,
        "input_ids": "yellow_2019_01|yellow_2019_02|yellow_2019_03|yellow_2019_04|yellow_2019_05|yellow_2019_06|yellow_2019_07|yellow_2019_08|yellow_2019_09|yellow_2019_10|yellow_2019_11|yellow_2019_12",
        "relative_paths": "data/derived-csv/yellow/2019/yellow_tripdata_2019-01.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-02.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-03.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-04.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-05.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-06.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-07.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-08.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-09.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-10.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-11.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-12.csv",
        "total_parquet_bytes": 1243532931,
        "total_csv_bytes": 9146557593,
        "expected_rows": 84598444,
        "expected_columns": 19,
        "default_status": "ok",
        "pyarrow_status": "ok",
        "default_total_median_ms": 221708.58610000001,
        "pyarrow_total_median_ms": 87965.5839,
        "speedup_default_over_pyarrow": 2.5204014600987605,
        "winner": "pandas_pyarrow",
        "default_error": null,
        "pyarrow_error": null
      }
    },
    {
      "workload": {
        "workload_id": "mixed_shape_workload",
        "workload_name": "Mixed-shape workload",
        "workload_type": "sequential_concat",
        "description": "Deliberately uneven analyst-folder workload: tiny, small, medium, large, and giant CSV files.",
        "members": [
          {
            "file_order": 1,
            "year": "2020",
            "month": "04",
            "taxi_type": "yellow",
            "shape_label": "tiny",
            "input_id": "yellow_2020_04",
            "relative_path": "data/derived-csv/yellow/2020/yellow_tripdata_2020-04.csv",
            "expected_rows": 238073,
            "expected_columns": 19,
            "parquet_size_bytes": 4442620,
            "csv_size_bytes": 25361579
          },
          {
            "file_order": 2,
            "year": "2020",
            "month": "08",
            "taxi_type": "yellow",
            "shape_label": "small",
            "input_id": "yellow_2020_08",
            "relative_path": "data/derived-csv/yellow/2020/yellow_tripdata_2020-08.csv",
            "expected_rows": 1007286,
            "expected_columns": 19,
            "parquet_size_bytes": 16601463,
            "csv_size_bytes": 108148348
          },
          {
            "file_order": 3,
            "year": "2021",
            "month": "06",
            "taxi_type": "yellow",
            "shape_label": "medium",
            "input_id": "yellow_2021_06",
            "relative_path": "data/derived-csv/yellow/2021/yellow_tripdata_2021-06.csv",
            "expected_rows": 2834264,
            "expected_columns": 19,
            "parquet_size_bytes": 44071592,
            "csv_size_bytes": 310122373
          },
          {
            "file_order": 4,
            "year": "2022",
            "month": "10",
            "taxi_type": "yellow",
            "shape_label": "large",
            "input_id": "yellow_2022_10",
            "relative_path": "data/derived-csv/yellow/2022/yellow_tripdata_2022-10.csv",
            "expected_rows": 3675411,
            "expected_columns": 19,
            "parquet_size_bytes": 57061938,
            "csv_size_bytes": 404178000
          },
          {
            "file_order": 5,
            "year": "2019",
            "month": "03",
            "taxi_type": "yellow",
            "shape_label": "giant",
            "input_id": "yellow_2019_03",
            "relative_path": "data/derived-csv/yellow/2019/yellow_tripdata_2019-03.csv",
            "expected_rows": 7866620,
            "expected_columns": 19,
            "parquet_size_bytes": 116017372,
            "csv_size_bytes": 852779445
          }
        ],
        "file_count": 5,
        "input_ids": [
          "yellow_2020_04",
          "yellow_2020_08",
          "yellow_2021_06",
          "yellow_2022_10",
          "yellow_2019_03"
        ],
        "relative_paths": [
          "data/derived-csv/yellow/2020/yellow_tripdata_2020-04.csv",
          "data/derived-csv/yellow/2020/yellow_tripdata_2020-08.csv",
          "data/derived-csv/yellow/2021/yellow_tripdata_2021-06.csv",
          "data/derived-csv/yellow/2022/yellow_tripdata_2022-10.csv",
          "data/derived-csv/yellow/2019/yellow_tripdata_2019-03.csv"
        ],
        "total_parquet_bytes": 238194985,
        "total_csv_bytes": 1700589745,
        "expected_rows": 15621654,
        "expected_columns": 19
      },
      "reader_results": {
        "pandas_pyarrow": {
          "reader": "pandas_pyarrow",
          "status": "ok",
          "first_run": {
            "read_phase_ms": 3899.8477,
            "concat_phase_ms": 1209.1883,
            "total_ms": 5109.036,
            "observed_rows": 15621654,
            "observed_columns": 19,
            "final_dataframe_memory_usage_deep_bytes": 2389782879,
            "rss_before_bytes": 94650368,
            "rss_after_read_bytes": 4706050048,
            "rss_after_concat_bytes": 6956261376,
            "run_index": 1
          },
          "runs": [
            {
              "read_phase_ms": 3899.8477,
              "concat_phase_ms": 1209.1883,
              "total_ms": 5109.036,
              "observed_rows": 15621654,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 2389782879,
              "rss_before_bytes": 94650368,
              "rss_after_read_bytes": 4706050048,
              "rss_after_concat_bytes": 6956261376,
              "run_index": 1
            },
            {
              "read_phase_ms": 3508.4674,
              "concat_phase_ms": 1107.6628,
              "total_ms": 4616.1302,
              "observed_rows": 15621654,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 2389782879,
              "rss_before_bytes": 4702576640,
              "rss_after_read_bytes": 5909200896,
              "rss_after_concat_bytes": 8158838784,
              "run_index": 2
            },
            {
              "read_phase_ms": 3374.4641,
              "concat_phase_ms": 1368.6833,
              "total_ms": 4743.1474,
              "observed_rows": 15621654,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 2389782879,
              "rss_before_bytes": 5902426112,
              "rss_after_read_bytes": 7048155136,
              "rss_after_concat_bytes": 8973651968,
              "run_index": 3
            }
          ],
          "error": null,
          "traceback": null,
          "total_median_ms": 4743.1474,
          "total_avg_ms": 4822.7712,
          "total_min_ms": 4616.1302,
          "total_max_ms": 5109.036,
          "total_stdev_ms": 255.917944004011,
          "rss_peak_bytes": 9206603776,
          "rss_peak_delta_bytes": 9111953408,
          "observed_rows": 15621654,
          "observed_columns": 19,
          "stderr": "C:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat("
        },
        "pandas_default": {
          "reader": "pandas_default",
          "status": "ok",
          "first_run": {
            "read_phase_ms": 29640.4264,
            "concat_phase_ms": 836.3423,
            "total_ms": 30476.7687,
            "observed_rows": 15621654,
            "observed_columns": 19,
            "final_dataframe_memory_usage_deep_bytes": 3204011059,
            "rss_before_bytes": 94441472,
            "rss_after_read_bytes": 3473612800,
            "rss_after_concat_bytes": 5473329152,
            "run_index": 1
          },
          "runs": [
            {
              "read_phase_ms": 29640.4264,
              "concat_phase_ms": 836.3423,
              "total_ms": 30476.7687,
              "observed_rows": 15621654,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 3204011059,
              "rss_before_bytes": 94441472,
              "rss_after_read_bytes": 3473612800,
              "rss_after_concat_bytes": 5473329152,
              "run_index": 1
            },
            {
              "read_phase_ms": 28917.7485,
              "concat_phase_ms": 778.026,
              "total_ms": 29695.774500000003,
              "observed_rows": 15621654,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 3204011059,
              "rss_before_bytes": 1461444608,
              "rss_after_read_bytes": 3489468416,
              "rss_after_concat_bytes": 5489102848,
              "run_index": 2
            },
            {
              "read_phase_ms": 29708.802,
              "concat_phase_ms": 772.6655,
              "total_ms": 30481.4675,
              "observed_rows": 15621654,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 3204011059,
              "rss_before_bytes": 1470836736,
              "rss_after_read_bytes": 3491516416,
              "rss_after_concat_bytes": 5491150848,
              "run_index": 3
            }
          ],
          "error": null,
          "traceback": null,
          "total_median_ms": 30476.7687,
          "total_avg_ms": 30218.003566666666,
          "total_min_ms": 29695.774500000003,
          "total_max_ms": 30481.4675,
          "total_stdev_ms": 452.26974056862514,
          "rss_peak_bytes": 5450629120,
          "rss_peak_delta_bytes": 5356187648,
          "observed_rows": 15621654,
          "observed_columns": 19,
          "stderr": "C:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat("
        }
      },
      "comparison": {
        "workload_id": "mixed_shape_workload",
        "workload_name": "Mixed-shape workload",
        "workload_type": "sequential_concat",
        "description": "Deliberately uneven analyst-folder workload: tiny, small, medium, large, and giant CSV files.",
        "file_count": 5,
        "input_ids": "yellow_2020_04|yellow_2020_08|yellow_2021_06|yellow_2022_10|yellow_2019_03",
        "relative_paths": "data/derived-csv/yellow/2020/yellow_tripdata_2020-04.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-08.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-06.csv|data/derived-csv/yellow/2022/yellow_tripdata_2022-10.csv|data/derived-csv/yellow/2019/yellow_tripdata_2019-03.csv",
        "total_parquet_bytes": 238194985,
        "total_csv_bytes": 1700589745,
        "expected_rows": 15621654,
        "expected_columns": 19,
        "default_status": "ok",
        "pyarrow_status": "ok",
        "default_total_median_ms": 30476.7687,
        "pyarrow_total_median_ms": 4743.1474,
        "speedup_default_over_pyarrow": 6.425431497237468,
        "winner": "pandas_pyarrow",
        "default_error": null,
        "pyarrow_error": null
      }
    },
    {
      "workload": {
        "workload_id": "recovery_h2_2020",
        "workload_name": "Recovery H2 2020",
        "workload_type": "sequential_concat",
        "description": "Six sequential files showing gradual recovery after the initial pandemic collapse.",
        "members": [
          {
            "file_order": 1,
            "year": "2020",
            "month": "07",
            "taxi_type": "yellow",
            "shape_label": "small",
            "input_id": "yellow_2020_07",
            "relative_path": "data/derived-csv/yellow/2020/yellow_tripdata_2020-07.csv",
            "expected_rows": 800412,
            "expected_columns": 19,
            "parquet_size_bytes": 13387778,
            "csv_size_bytes": 85783914
          },
          {
            "file_order": 2,
            "year": "2020",
            "month": "08",
            "taxi_type": "yellow",
            "shape_label": "small",
            "input_id": "yellow_2020_08",
            "relative_path": "data/derived-csv/yellow/2020/yellow_tripdata_2020-08.csv",
            "expected_rows": 1007286,
            "expected_columns": 19,
            "parquet_size_bytes": 16601463,
            "csv_size_bytes": 108148348
          },
          {
            "file_order": 3,
            "year": "2020",
            "month": "09",
            "taxi_type": "yellow",
            "shape_label": "medium",
            "input_id": "yellow_2020_09",
            "relative_path": "data/derived-csv/yellow/2020/yellow_tripdata_2020-09.csv",
            "expected_rows": 1341017,
            "expected_columns": 19,
            "parquet_size_bytes": 21381938,
            "csv_size_bytes": 144379540
          },
          {
            "file_order": 4,
            "year": "2020",
            "month": "10",
            "taxi_type": "yellow",
            "shape_label": "medium",
            "input_id": "yellow_2020_10",
            "relative_path": "data/derived-csv/yellow/2020/yellow_tripdata_2020-10.csv",
            "expected_rows": 1681132,
            "expected_columns": 19,
            "parquet_size_bytes": 26306876,
            "csv_size_bytes": 181252408
          },
          {
            "file_order": 5,
            "year": "2020",
            "month": "11",
            "taxi_type": "yellow",
            "shape_label": "medium",
            "input_id": "yellow_2020_11",
            "relative_path": "data/derived-csv/yellow/2020/yellow_tripdata_2020-11.csv",
            "expected_rows": 1509000,
            "expected_columns": 19,
            "parquet_size_bytes": 23583368,
            "csv_size_bytes": 162630572
          },
          {
            "file_order": 6,
            "year": "2020",
            "month": "12",
            "taxi_type": "yellow",
            "shape_label": "medium",
            "input_id": "yellow_2020_12",
            "relative_path": "data/derived-csv/yellow/2020/yellow_tripdata_2020-12.csv",
            "expected_rows": 1461898,
            "expected_columns": 19,
            "parquet_size_bytes": 23020036,
            "csv_size_bytes": 157403868
          }
        ],
        "file_count": 6,
        "input_ids": [
          "yellow_2020_07",
          "yellow_2020_08",
          "yellow_2020_09",
          "yellow_2020_10",
          "yellow_2020_11",
          "yellow_2020_12"
        ],
        "relative_paths": [
          "data/derived-csv/yellow/2020/yellow_tripdata_2020-07.csv",
          "data/derived-csv/yellow/2020/yellow_tripdata_2020-08.csv",
          "data/derived-csv/yellow/2020/yellow_tripdata_2020-09.csv",
          "data/derived-csv/yellow/2020/yellow_tripdata_2020-10.csv",
          "data/derived-csv/yellow/2020/yellow_tripdata_2020-11.csv",
          "data/derived-csv/yellow/2020/yellow_tripdata_2020-12.csv"
        ],
        "total_parquet_bytes": 124281459,
        "total_csv_bytes": 839598650,
        "expected_rows": 7800745,
        "expected_columns": 19
      },
      "reader_results": {
        "pandas_default": {
          "reader": "pandas_default",
          "status": "ok",
          "first_run": {
            "read_phase_ms": 13579.0373,
            "concat_phase_ms": 308.5527,
            "total_ms": 13887.59,
            "observed_rows": 7800745,
            "observed_columns": 19,
            "final_dataframe_memory_usage_deep_bytes": 1599599268,
            "rss_before_bytes": 94490624,
            "rss_after_read_bytes": 1800155136,
            "rss_after_concat_bytes": 2798731264,
            "run_index": 1
          },
          "runs": [
            {
              "read_phase_ms": 13579.0373,
              "concat_phase_ms": 308.5527,
              "total_ms": 13887.59,
              "observed_rows": 7800745,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 1599599268,
              "rss_before_bytes": 94490624,
              "rss_after_read_bytes": 1800155136,
              "rss_after_concat_bytes": 2798731264,
              "run_index": 1
            },
            {
              "read_phase_ms": 14880.9912,
              "concat_phase_ms": 294.2187,
              "total_ms": 15175.2099,
              "observed_rows": 7800745,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 1599599268,
              "rss_before_bytes": 791646208,
              "rss_after_read_bytes": 1817616384,
              "rss_after_concat_bytes": 2815598592,
              "run_index": 2
            },
            {
              "read_phase_ms": 15020.4926,
              "concat_phase_ms": 319.8145,
              "total_ms": 15340.3071,
              "observed_rows": 7800745,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 1599599268,
              "rss_before_bytes": 802570240,
              "rss_after_read_bytes": 1818910720,
              "rss_after_concat_bytes": 2817417216,
              "run_index": 3
            }
          ],
          "error": null,
          "traceback": null,
          "total_median_ms": 15175.2099,
          "total_avg_ms": 14801.035666666668,
          "total_min_ms": 13887.59,
          "total_max_ms": 15340.3071,
          "total_stdev_ms": 795.3625027784772,
          "rss_peak_bytes": 2784063488,
          "rss_peak_delta_bytes": 2689572864,
          "observed_rows": 7800745,
          "observed_columns": 19,
          "stderr": "C:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat("
        },
        "pandas_pyarrow": {
          "reader": "pandas_pyarrow",
          "status": "ok",
          "first_run": {
            "read_phase_ms": 1429.6344,
            "concat_phase_ms": 246.0286,
            "total_ms": 1675.663,
            "observed_rows": 7800745,
            "observed_columns": 19,
            "final_dataframe_memory_usage_deep_bytes": 1193054830,
            "rss_before_bytes": 94527488,
            "rss_after_read_bytes": 2047303680,
            "rss_after_concat_bytes": 3170922496,
            "run_index": 1
          },
          "runs": [
            {
              "read_phase_ms": 1429.6344,
              "concat_phase_ms": 246.0286,
              "total_ms": 1675.663,
              "observed_rows": 7800745,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 1193054830,
              "rss_before_bytes": 94527488,
              "rss_after_read_bytes": 2047303680,
              "rss_after_concat_bytes": 3170922496,
              "run_index": 1
            },
            {
              "read_phase_ms": 1427.2719,
              "concat_phase_ms": 287.5762,
              "total_ms": 1714.8481,
              "observed_rows": 7800745,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 1193054830,
              "rss_before_bytes": 2046676992,
              "rss_after_read_bytes": 2126315520,
              "rss_after_concat_bytes": 3249659904,
              "run_index": 2
            },
            {
              "read_phase_ms": 1471.3939,
              "concat_phase_ms": 290.3781,
              "total_ms": 1761.772,
              "observed_rows": 7800745,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 1193054830,
              "rss_before_bytes": 2125144064,
              "rss_after_read_bytes": 2105823232,
              "rss_after_concat_bytes": 3229159424,
              "run_index": 3
            }
          ],
          "error": null,
          "traceback": null,
          "total_median_ms": 1714.8481,
          "total_avg_ms": 1717.4277,
          "total_min_ms": 1675.663,
          "total_max_ms": 1761.772,
          "total_stdev_ms": 43.112419583804346,
          "rss_peak_bytes": 3106471936,
          "rss_peak_delta_bytes": 3011944448,
          "observed_rows": 7800745,
          "observed_columns": 19,
          "stderr": "C:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat("
        }
      },
      "comparison": {
        "workload_id": "recovery_h2_2020",
        "workload_name": "Recovery H2 2020",
        "workload_type": "sequential_concat",
        "description": "Six sequential files showing gradual recovery after the initial pandemic collapse.",
        "file_count": 6,
        "input_ids": "yellow_2020_07|yellow_2020_08|yellow_2020_09|yellow_2020_10|yellow_2020_11|yellow_2020_12",
        "relative_paths": "data/derived-csv/yellow/2020/yellow_tripdata_2020-07.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-08.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-09.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-10.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-11.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-12.csv",
        "total_parquet_bytes": 124281459,
        "total_csv_bytes": 839598650,
        "expected_rows": 7800745,
        "expected_columns": 19,
        "default_status": "ok",
        "pyarrow_status": "ok",
        "default_total_median_ms": 15175.2099,
        "pyarrow_total_median_ms": 1714.8481,
        "speedup_default_over_pyarrow": 8.849302687509173,
        "winner": "pandas_pyarrow",
        "default_error": null,
        "pyarrow_error": null
      }
    },
    {
      "workload": {
        "workload_id": "recovery_year_2021",
        "workload_name": "Recovery year 2021",
        "workload_type": "sequential_concat",
        "description": "A complete recovery-era year representing a full operational reporting period.",
        "members": [
          {
            "file_order": 1,
            "year": "2021",
            "month": "01",
            "taxi_type": "yellow",
            "shape_label": "medium",
            "input_id": "yellow_2021_01",
            "relative_path": "data/derived-csv/yellow/2021/yellow_tripdata_2021-01.csv",
            "expected_rows": 1369769,
            "expected_columns": 19,
            "parquet_size_bytes": 21686067,
            "csv_size_bytes": 147424437
          },
          {
            "file_order": 2,
            "year": "2021",
            "month": "02",
            "taxi_type": "yellow",
            "shape_label": "medium",
            "input_id": "yellow_2021_02",
            "relative_path": "data/derived-csv/yellow/2021/yellow_tripdata_2021-02.csv",
            "expected_rows": 1371709,
            "expected_columns": 19,
            "parquet_size_bytes": 21777258,
            "csv_size_bytes": 147690594
          },
          {
            "file_order": 3,
            "year": "2021",
            "month": "03",
            "taxi_type": "yellow",
            "shape_label": "medium",
            "input_id": "yellow_2021_03",
            "relative_path": "data/derived-csv/yellow/2021/yellow_tripdata_2021-03.csv",
            "expected_rows": 1925152,
            "expected_columns": 19,
            "parquet_size_bytes": 30007852,
            "csv_size_bytes": 207707455
          },
          {
            "file_order": 4,
            "year": "2021",
            "month": "04",
            "taxi_type": "yellow",
            "shape_label": "medium",
            "input_id": "yellow_2021_04",
            "relative_path": "data/derived-csv/yellow/2021/yellow_tripdata_2021-04.csv",
            "expected_rows": 2171187,
            "expected_columns": 19,
            "parquet_size_bytes": 34018560,
            "csv_size_bytes": 236675669
          },
          {
            "file_order": 5,
            "year": "2021",
            "month": "05",
            "taxi_type": "yellow",
            "shape_label": "medium",
            "input_id": "yellow_2021_05",
            "relative_path": "data/derived-csv/yellow/2021/yellow_tripdata_2021-05.csv",
            "expected_rows": 2507109,
            "expected_columns": 19,
            "parquet_size_bytes": 38743682,
            "csv_size_bytes": 273903047
          },
          {
            "file_order": 6,
            "year": "2021",
            "month": "06",
            "taxi_type": "yellow",
            "shape_label": "medium",
            "input_id": "yellow_2021_06",
            "relative_path": "data/derived-csv/yellow/2021/yellow_tripdata_2021-06.csv",
            "expected_rows": 2834264,
            "expected_columns": 19,
            "parquet_size_bytes": 44071592,
            "csv_size_bytes": 310122373
          },
          {
            "file_order": 7,
            "year": "2021",
            "month": "07",
            "taxi_type": "yellow",
            "shape_label": "medium",
            "input_id": "yellow_2021_07",
            "relative_path": "data/derived-csv/yellow/2021/yellow_tripdata_2021-07.csv",
            "expected_rows": 2821746,
            "expected_columns": 19,
            "parquet_size_bytes": 43697690,
            "csv_size_bytes": 308727112
          },
          {
            "file_order": 8,
            "year": "2021",
            "month": "08",
            "taxi_type": "yellow",
            "shape_label": "medium",
            "input_id": "yellow_2021_08",
            "relative_path": "data/derived-csv/yellow/2021/yellow_tripdata_2021-08.csv",
            "expected_rows": 2788757,
            "expected_columns": 19,
            "parquet_size_bytes": 43425907,
            "csv_size_bytes": 305166477
          },
          {
            "file_order": 9,
            "year": "2021",
            "month": "09",
            "taxi_type": "yellow",
            "shape_label": "medium",
            "input_id": "yellow_2021_09",
            "relative_path": "data/derived-csv/yellow/2021/yellow_tripdata_2021-09.csv",
            "expected_rows": 2963793,
            "expected_columns": 19,
            "parquet_size_bytes": 46125883,
            "csv_size_bytes": 324577596
          },
          {
            "file_order": 10,
            "year": "2021",
            "month": "10",
            "taxi_type": "yellow",
            "shape_label": "large",
            "input_id": "yellow_2021_10",
            "relative_path": "data/derived-csv/yellow/2021/yellow_tripdata_2021-10.csv",
            "expected_rows": 3463504,
            "expected_columns": 19,
            "parquet_size_bytes": 53286464,
            "csv_size_bytes": 379854989
          },
          {
            "file_order": 11,
            "year": "2021",
            "month": "11",
            "taxi_type": "yellow",
            "shape_label": "large",
            "input_id": "yellow_2021_11",
            "relative_path": "data/derived-csv/yellow/2021/yellow_tripdata_2021-11.csv",
            "expected_rows": 3472949,
            "expected_columns": 19,
            "parquet_size_bytes": 53100722,
            "csv_size_bytes": 381161041
          },
          {
            "file_order": 12,
            "year": "2021",
            "month": "12",
            "taxi_type": "yellow",
            "shape_label": "large",
            "input_id": "yellow_2021_12",
            "relative_path": "data/derived-csv/yellow/2021/yellow_tripdata_2021-12.csv",
            "expected_rows": 3214369,
            "expected_columns": 19,
            "parquet_size_bytes": 49639052,
            "csv_size_bytes": 352682357
          }
        ],
        "file_count": 12,
        "input_ids": [
          "yellow_2021_01",
          "yellow_2021_02",
          "yellow_2021_03",
          "yellow_2021_04",
          "yellow_2021_05",
          "yellow_2021_06",
          "yellow_2021_07",
          "yellow_2021_08",
          "yellow_2021_09",
          "yellow_2021_10",
          "yellow_2021_11",
          "yellow_2021_12"
        ],
        "relative_paths": [
          "data/derived-csv/yellow/2021/yellow_tripdata_2021-01.csv",
          "data/derived-csv/yellow/2021/yellow_tripdata_2021-02.csv",
          "data/derived-csv/yellow/2021/yellow_tripdata_2021-03.csv",
          "data/derived-csv/yellow/2021/yellow_tripdata_2021-04.csv",
          "data/derived-csv/yellow/2021/yellow_tripdata_2021-05.csv",
          "data/derived-csv/yellow/2021/yellow_tripdata_2021-06.csv",
          "data/derived-csv/yellow/2021/yellow_tripdata_2021-07.csv",
          "data/derived-csv/yellow/2021/yellow_tripdata_2021-08.csv",
          "data/derived-csv/yellow/2021/yellow_tripdata_2021-09.csv",
          "data/derived-csv/yellow/2021/yellow_tripdata_2021-10.csv",
          "data/derived-csv/yellow/2021/yellow_tripdata_2021-11.csv",
          "data/derived-csv/yellow/2021/yellow_tripdata_2021-12.csv"
        ],
        "total_parquet_bytes": 479580729,
        "total_csv_bytes": 3375693147,
        "expected_rows": 30904308,
        "expected_columns": 19
      },
      "reader_results": {
        "pandas_pyarrow": {
          "reader": "pandas_pyarrow",
          "status": "ok",
          "first_run": {
            "read_phase_ms": 6740.8957,
            "concat_phase_ms": 8411.614,
            "total_ms": 15152.509699999999,
            "observed_rows": 30904308,
            "observed_columns": 19,
            "final_dataframe_memory_usage_deep_bytes": 4727072845,
            "rss_before_bytes": 94412800,
            "rss_after_read_bytes": 6289702912,
            "rss_after_concat_bytes": 6734311424,
            "run_index": 1
          },
          "runs": [
            {
              "read_phase_ms": 6740.8957,
              "concat_phase_ms": 8411.614,
              "total_ms": 15152.509699999999,
              "observed_rows": 30904308,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 4727072845,
              "rss_before_bytes": 94412800,
              "rss_after_read_bytes": 6289702912,
              "rss_after_concat_bytes": 6734311424,
              "run_index": 1
            },
            {
              "read_phase_ms": 6999.0605,
              "concat_phase_ms": 4283.4755,
              "total_ms": 11282.536,
              "observed_rows": 30904308,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 4727072845,
              "rss_before_bytes": 3564621824,
              "rss_after_read_bytes": 6311428096,
              "rss_after_concat_bytes": 4835028992,
              "run_index": 2
            },
            {
              "read_phase_ms": 7015.855,
              "concat_phase_ms": 7106.7299,
              "total_ms": 14122.5849,
              "observed_rows": 30904308,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 4727072845,
              "rss_before_bytes": 1672790016,
              "rss_after_read_bytes": 7266086912,
              "rss_after_concat_bytes": 6213615616,
              "run_index": 3
            }
          ],
          "error": null,
          "traceback": null,
          "total_median_ms": 14122.5849,
          "total_avg_ms": 13519.2102,
          "total_min_ms": 11282.536,
          "total_max_ms": 15152.509699999999,
          "total_stdev_ms": 2004.3003470345925,
          "rss_peak_bytes": 9488764928,
          "rss_peak_delta_bytes": 9394352128,
          "observed_rows": 30904308,
          "observed_columns": 19,
          "stderr": "C:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat("
        },
        "pandas_default": {
          "reader": "pandas_default",
          "status": "ok",
          "first_run": {
            "read_phase_ms": 59591.9161,
            "concat_phase_ms": 3997.0814,
            "total_ms": 63588.997500000005,
            "observed_rows": 30904308,
            "observed_columns": 19,
            "final_dataframe_memory_usage_deep_bytes": 6337767621,
            "rss_before_bytes": 94420992,
            "rss_after_read_bytes": 6906486784,
            "rss_after_concat_bytes": 8262721536,
            "run_index": 1
          },
          "runs": [
            {
              "read_phase_ms": 59591.9161,
              "concat_phase_ms": 3997.0814,
              "total_ms": 63588.997500000005,
              "observed_rows": 30904308,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 6337767621,
              "rss_before_bytes": 94420992,
              "rss_after_read_bytes": 6906486784,
              "rss_after_concat_bytes": 8262721536,
              "run_index": 1
            },
            {
              "read_phase_ms": 60108.7205,
              "concat_phase_ms": 4462.0193,
              "total_ms": 64570.7398,
              "observed_rows": 30904308,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 6337767621,
              "rss_before_bytes": 1055436800,
              "rss_after_read_bytes": 6896582656,
              "rss_after_concat_bytes": 8158130176,
              "run_index": 2
            },
            {
              "read_phase_ms": 59535.6299,
              "concat_phase_ms": 4169.2967,
              "total_ms": 63704.9266,
              "observed_rows": 30904308,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 6337767621,
              "rss_before_bytes": 512008192,
              "rss_after_read_bytes": 6576807936,
              "rss_after_concat_bytes": 7500505088,
              "run_index": 3
            }
          ],
          "error": null,
          "traceback": null,
          "total_median_ms": 63704.9266,
          "total_avg_ms": 63954.88796666667,
          "total_min_ms": 63588.997500000005,
          "total_max_ms": 64570.7398,
          "total_stdev_ms": 536.4839228921252,
          "rss_peak_bytes": 9744281600,
          "rss_peak_delta_bytes": 9649860608,
          "observed_rows": 30904308,
          "observed_columns": 19,
          "stderr": "C:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat("
        }
      },
      "comparison": {
        "workload_id": "recovery_year_2021",
        "workload_name": "Recovery year 2021",
        "workload_type": "sequential_concat",
        "description": "A complete recovery-era year representing a full operational reporting period.",
        "file_count": 12,
        "input_ids": "yellow_2021_01|yellow_2021_02|yellow_2021_03|yellow_2021_04|yellow_2021_05|yellow_2021_06|yellow_2021_07|yellow_2021_08|yellow_2021_09|yellow_2021_10|yellow_2021_11|yellow_2021_12",
        "relative_paths": "data/derived-csv/yellow/2021/yellow_tripdata_2021-01.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-02.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-03.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-04.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-05.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-06.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-07.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-08.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-09.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-10.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-11.csv|data/derived-csv/yellow/2021/yellow_tripdata_2021-12.csv",
        "total_parquet_bytes": 479580729,
        "total_csv_bytes": 3375693147,
        "expected_rows": 30904308,
        "expected_columns": 19,
        "default_status": "ok",
        "pyarrow_status": "ok",
        "default_total_median_ms": 63704.9266,
        "pyarrow_total_median_ms": 14122.5849,
        "speedup_default_over_pyarrow": 4.5108545674241265,
        "winner": "pandas_pyarrow",
        "default_error": null,
        "pyarrow_error": null
      }
    },
    {
      "workload": {
        "workload_id": "tiny_pandemic_q2_2020",
        "workload_name": "Tiny pandemic Q2 2020",
        "workload_type": "sequential_concat",
        "description": "Small three-file operational import. Includes the April 2020 low-volume file and early recovery months.",
        "members": [
          {
            "file_order": 1,
            "year": "2020",
            "month": "04",
            "taxi_type": "yellow",
            "shape_label": "tiny",
            "input_id": "yellow_2020_04",
            "relative_path": "data/derived-csv/yellow/2020/yellow_tripdata_2020-04.csv",
            "expected_rows": 238073,
            "expected_columns": 19,
            "parquet_size_bytes": 4442620,
            "csv_size_bytes": 25361579
          },
          {
            "file_order": 2,
            "year": "2020",
            "month": "05",
            "taxi_type": "yellow",
            "shape_label": "tiny",
            "input_id": "yellow_2020_05",
            "relative_path": "data/derived-csv/yellow/2020/yellow_tripdata_2020-05.csv",
            "expected_rows": 348415,
            "expected_columns": 19,
            "parquet_size_bytes": 6229864,
            "csv_size_bytes": 36997127
          },
          {
            "file_order": 3,
            "year": "2020",
            "month": "06",
            "taxi_type": "yellow",
            "shape_label": "small",
            "input_id": "yellow_2020_06",
            "relative_path": "data/derived-csv/yellow/2020/yellow_tripdata_2020-06.csv",
            "expected_rows": 549797,
            "expected_columns": 19,
            "parquet_size_bytes": 9505358,
            "csv_size_bytes": 58805474
          }
        ],
        "file_count": 3,
        "input_ids": [
          "yellow_2020_04",
          "yellow_2020_05",
          "yellow_2020_06"
        ],
        "relative_paths": [
          "data/derived-csv/yellow/2020/yellow_tripdata_2020-04.csv",
          "data/derived-csv/yellow/2020/yellow_tripdata_2020-05.csv",
          "data/derived-csv/yellow/2020/yellow_tripdata_2020-06.csv"
        ],
        "total_parquet_bytes": 20177842,
        "total_csv_bytes": 121164180,
        "expected_rows": 1136285,
        "expected_columns": 19
      },
      "reader_results": {
        "pandas_default": {
          "reader": "pandas_default",
          "status": "ok",
          "first_run": {
            "read_phase_ms": 1699.3755,
            "concat_phase_ms": 30.2904,
            "total_ms": 1729.6659000000002,
            "observed_rows": 1136285,
            "observed_columns": 19,
            "final_dataframe_memory_usage_deep_bytes": 232951395,
            "rss_before_bytes": 94535680,
            "rss_after_read_bytes": 363241472,
            "rss_after_concat_bytes": 508805120,
            "run_index": 1
          },
          "runs": [
            {
              "read_phase_ms": 1699.3755,
              "concat_phase_ms": 30.2904,
              "total_ms": 1729.6659000000002,
              "observed_rows": 1136285,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 232951395,
              "rss_before_bytes": 94535680,
              "rss_after_read_bytes": 363241472,
              "rss_after_concat_bytes": 508805120,
              "run_index": 1
            },
            {
              "read_phase_ms": 2357.2618,
              "concat_phase_ms": 41.7622,
              "total_ms": 2399.0240000000003,
              "observed_rows": 1136285,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 232951395,
              "rss_before_bytes": 216358912,
              "rss_after_read_bytes": 366641152,
              "rss_after_concat_bytes": 512131072,
              "run_index": 2
            },
            {
              "read_phase_ms": 2344.4383,
              "concat_phase_ms": 33.1302,
              "total_ms": 2377.5685,
              "observed_rows": 1136285,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 232951395,
              "rss_before_bytes": 216408064,
              "rss_after_read_bytes": 368099328,
              "rss_after_concat_bytes": 513589248,
              "run_index": 3
            }
          ],
          "error": null,
          "traceback": null,
          "total_median_ms": 2377.5685,
          "total_avg_ms": 2168.7528,
          "total_min_ms": 1729.6659000000002,
          "total_max_ms": 2399.0240000000003,
          "total_stdev_ms": 380.41170320295083,
          "rss_peak_bytes": 481234944,
          "rss_peak_delta_bytes": 386699264,
          "observed_rows": 1136285,
          "observed_columns": 19,
          "stderr": "C:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:466: DtypeWarning: Columns (0: store_and_fwd_flag) have mixed types. Specify dtype option on import or set low_memory=False.\n  return pd.read_csv(path)\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat("
        },
        "pandas_pyarrow": {
          "reader": "pandas_pyarrow",
          "status": "ok",
          "first_run": {
            "read_phase_ms": 222.4533,
            "concat_phase_ms": 37.7226,
            "total_ms": 260.1759,
            "observed_rows": 1136285,
            "observed_columns": 19,
            "final_dataframe_memory_usage_deep_bytes": 173739859,
            "rss_before_bytes": 94388224,
            "rss_after_read_bytes": 516165632,
            "rss_after_concat_bytes": 680108032,
            "run_index": 1
          },
          "runs": [
            {
              "read_phase_ms": 222.4533,
              "concat_phase_ms": 37.7226,
              "total_ms": 260.1759,
              "observed_rows": 1136285,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 173739859,
              "rss_before_bytes": 94388224,
              "rss_after_read_bytes": 516165632,
              "rss_after_concat_bytes": 680108032,
              "run_index": 1
            },
            {
              "read_phase_ms": 185.0851,
              "concat_phase_ms": 39.3809,
              "total_ms": 224.466,
              "observed_rows": 1136285,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 173739859,
              "rss_before_bytes": 516698112,
              "rss_after_read_bytes": 681910272,
              "rss_after_concat_bytes": 845549568,
              "run_index": 2
            },
            {
              "read_phase_ms": 189.2125,
              "concat_phase_ms": 35.6666,
              "total_ms": 224.8791,
              "observed_rows": 1136285,
              "observed_columns": 19,
              "final_dataframe_memory_usage_deep_bytes": 173739859,
              "rss_before_bytes": 681910272,
              "rss_after_read_bytes": 746889216,
              "rss_after_concat_bytes": 910520320,
              "run_index": 3
            }
          ],
          "error": null,
          "traceback": null,
          "total_median_ms": 224.8791,
          "total_avg_ms": 236.50699999999998,
          "total_min_ms": 224.466,
          "total_max_ms": 260.1759,
          "total_stdev_ms": 20.49890932001018,
          "rss_peak_bytes": 805863424,
          "rss_peak_delta_bytes": 711475200,
          "observed_rows": 1136285,
          "observed_columns": 19,
          "stderr": "C:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat(\nC:\\code\\projects\\data-reports\\numpy-vs-pyarrow-nyc-taxi\\scripts\\benchmark_round2_multi_file.py:498: Pandas4Warning: The copy keyword is deprecated and will be removed in a future version. Copy-on-Write is active in pandas since 3.0 which utilizes a lazy copy mechanism that defers copies until necessary. Use .copy() to make an eager copy if necessary.\n  combined = pd.concat("
        }
      },
      "comparison": {
        "workload_id": "tiny_pandemic_q2_2020",
        "workload_name": "Tiny pandemic Q2 2020",
        "workload_type": "sequential_concat",
        "description": "Small three-file operational import. Includes the April 2020 low-volume file and early recovery months.",
        "file_count": 3,
        "input_ids": "yellow_2020_04|yellow_2020_05|yellow_2020_06",
        "relative_paths": "data/derived-csv/yellow/2020/yellow_tripdata_2020-04.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-05.csv|data/derived-csv/yellow/2020/yellow_tripdata_2020-06.csv",
        "total_parquet_bytes": 20177842,
        "total_csv_bytes": 121164180,
        "expected_rows": 1136285,
        "expected_columns": 19,
        "default_status": "ok",
        "pyarrow_status": "ok",
        "default_total_median_ms": 2377.5685,
        "pyarrow_total_median_ms": 224.8791,
        "speedup_default_over_pyarrow": 10.572652149532793,
        "winner": "pandas_pyarrow",
        "default_error": null,
        "pyarrow_error": null
      }
    }
  ]
}