From 886ec7bad907f1aa2a4417a210fca18c9a4c4aec Mon Sep 17 00:00:00 2001
From: carlos-alm <127798846+carlos-alm@users.noreply.github.com>
Date: Wed, 4 Mar 2026 01:46:50 -0700
Subject: [PATCH] fix: show all build phases in benchmark report and add
 regression note
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The phase breakdown table in BUILD-BENCHMARKS.md was missing astMs,
cfgMs, dataflowMs, and wasmPreMs — phases that account for ~75% of
native build time since v3.0.1. Add them to phaseKeys/phaseLabels in
update-benchmark-report.js so future regenerations include them.

Also add a notes section explaining the v3.0.0→v3.0.3 regression:
the four new default-on phases total ~1,575ms, and the WASM pre-parse
double-parses all files on native builds because CFG/dataflow/complexity
are JS-only and need tree-sitter ASTs the native engine doesn't expose.
---
 generated/benchmarks/BUILD-BENCHMARKS.md | 10 ++++++++++
 scripts/update-benchmark-report.js       |  6 +++++-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/generated/benchmarks/BUILD-BENCHMARKS.md b/generated/benchmarks/BUILD-BENCHMARKS.md
index a619d8eb..4126fd38 100644
--- a/generated/benchmarks/BUILD-BENCHMARKS.md
+++ b/generated/benchmarks/BUILD-BENCHMARKS.md
@@ -132,6 +132,16 @@ calls, but the v2.3.0 CI benchmark confirms this was **insufficient** — WASM
 remains at 6.6 ms/file (vs 5.0 in v2.0.0). The WASM/Native ratio widened from
 2.0x to 3.5x. Further optimization of WASM boundary crossings in the JS
 extractor is needed to recover the regression.
+
+**Native build regression (v3.0.0 4.4 ms/file → v3.0.3 12.3 ms/file):** The regression is entirely
+from four new build phases added in v3.0.1 that are now default-on: AST node extraction (651ms),
+WASM pre-parse (388ms), dataflow analysis (367ms), and CFG construction (169ms) — totalling ~1,575ms
+of new work. The original seven phases (parse, insert, resolve, edges, structure, roles, complexity)
+actually got slightly faster (728ms → 542ms). The WASM pre-parse phase exists because CFG, dataflow,
+and complexity are implemented in JS and need tree-sitter AST trees to walk, but the native Rust engine
+only returns extracted symbols — not AST trees. So on native builds, all 172 files get parsed twice:
+once by Rust (85ms) and once by WASM (388ms). Eliminating this double-parse requires either implementing
+CFG/dataflow in Rust, or having the native engine expose tree-sitter trees to JS.
 <!-- NOTES_END -->
 
 <!-- BENCHMARK_DATA
diff --git a/scripts/update-benchmark-report.js b/scripts/update-benchmark-report.js
index 2c89cb5b..6b7d8482 100644
--- a/scripts/update-benchmark-report.js
+++ b/scripts/update-benchmark-report.js
@@ -152,15 +152,19 @@ for (const engineKey of ['native', 'wasm']) {
 const hasPhases = latest.native?.phases || latest.wasm?.phases;
 if (hasPhases) {
 	md += '### Build Phase Breakdown (latest)\n\n';
-	const phaseKeys = ['parseMs', 'insertMs', 'resolveMs', 'edgesMs', 'structureMs', 'rolesMs', 'complexityMs'];
+	const phaseKeys = ['parseMs', 'wasmPreMs', 'insertMs', 'resolveMs', 'edgesMs', 'structureMs', 'rolesMs', 'astMs', 'complexityMs', 'cfgMs', 'dataflowMs'];
 	const phaseLabels = {
 		parseMs: 'Parse',
+		wasmPreMs: 'WASM pre-parse',
 		insertMs: 'Insert nodes',
 		resolveMs: 'Resolve imports',
 		edgesMs: 'Build edges',
 		structureMs: 'Structure',
 		rolesMs: 'Roles',
+		astMs: 'AST nodes',
 		complexityMs: 'Complexity',
+		cfgMs: 'CFG',
+		dataflowMs: 'Dataflow',
 	};
 
 	md += '| Phase | Native | WASM |\n';