| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012 |
- #!/usr/bin/env node
- import fs from 'fs/promises';
- import fsSync from 'fs';
- import os from 'os';
- import path from 'path';
- import { spawn, execSync } from 'child_process';
- import { performance } from 'perf_hooks';
- import { PARSEVL_URL } from '../src/data/versions.js';
- import { getCookie } from '../src/server/helpers.js';
- import { WorkflowExecutor } from '../src/vl/workflow-executor.js';
- import { VLProjectContext } from '../src/vl/project-context.js';
- import { createVLValidateTool } from '../src/tools/vl-validate.js';
- import { extractFromFileTree, validateMeta } from '../src/vl/metadata-extractor.js';
- const MODEL = process.env.VL_CODE_MODEL || 'claude-opus-4-6';
- const TARGET_LANG = 'zh-CN';
- const TODAY = new Date().toISOString().slice(0, 10);
- const TEST_ROOT = path.join(os.homedir(), 'Documents', 'VLProjects', '_tests');
- const REPORT_DIR = path.join(process.cwd(), 'docs', 'benchmarks');
- const WORKFLOW_PATH = path.join(process.cwd(), '.vl-code', 'workflows', '6-file-codegen.json');
- const DOCCENTER_BASE = 'https://v4pre.visuallogic.ai/api/12027022';
- const DIRECT_REQUIRED_PATHS = [
- 'Database/CampusOps.vdb',
- 'Theme/CampusOpsTheme.vth',
- 'Services/OperationsOverview.vs',
- 'Services/ScheduleService.vs',
- 'Services/WorkOrderService.vs',
- 'Services/AlertService.vs',
- 'Services/SettingsService.vs',
- 'ExtComponents/KpiCard.cp',
- 'ExtComponents/StatusPill.cp',
- 'ExtComponents/FilterToolbar.cp',
- 'ExtComponents/AlertListItem.cp',
- 'Sections/OverviewPage.sc',
- 'Sections/ScheduleBoard.sc',
- 'Sections/WorkOrderDesk.sc',
- 'Sections/AlertCenter.sc',
- 'Sections/SettingsPage.sc',
- 'Apps/CampusOpsApp.vx',
- ];
- const DIRECT_CONTRACT = {
- projectName: 'CampusOps',
- theme: {
- filePath: 'Theme/CampusOpsTheme.vth',
- style: 'enterprise-light',
- palette: {
- primary: '#0F7B6C',
- primaryHover: '#136E62',
- warning: '#F59E0B',
- danger: '#DC2626',
- success: '#16A34A',
- slate: '#334155',
- surface: '#F8FAFC',
- surfaceElevated: '#FFFFFF',
- },
- direction: 'deep teal primary, slate neutrals, soft elevated cards, pill filters, enterprise operations dashboard',
- },
- dataModel: {
- filePath: 'Database/CampusOps.vdb',
- tables: [
- {
- id: 'Campus',
- fields: [
- { name: 'name', type: 'STRING' },
- { name: 'region', type: 'STRING' },
- { name: 'manager', type: 'STRING' },
- { name: 'activeAlerts', type: 'INT' },
- ],
- },
- {
- id: 'Technician',
- fields: [
- { name: 'name', type: 'STRING' },
- { name: 'campusId', type: 'INT' },
- { name: 'skillTag', type: 'STRING' },
- { name: 'shiftStatus', type: 'STRING' },
- { name: 'utilizationRate', type: 'FLOAT' },
- ],
- },
- {
- id: 'WorkOrder',
- fields: [
- { name: 'campusId', type: 'INT' },
- { name: 'title', type: 'STRING' },
- { name: 'priority', type: 'STRING' },
- { name: 'status', type: 'STRING' },
- { name: 'assigneeId', type: 'INT' },
- { name: 'slaHours', type: 'INT' },
- ],
- },
- {
- id: 'AlertRule',
- fields: [
- { name: 'campusId', type: 'INT' },
- { name: 'ruleName', type: 'STRING' },
- { name: 'thresholdValue', type: 'FLOAT' },
- { name: 'enabled', type: 'BOOL' },
- ],
- },
- {
- id: 'AlertEvent',
- fields: [
- { name: 'campusId', type: 'INT' },
- { name: 'ruleId', type: 'INT' },
- { name: 'severity', type: 'STRING' },
- { name: 'status', type: 'STRING' },
- { name: 'message', type: 'STRING' },
- ],
- },
- {
- id: 'UserPreference',
- fields: [
- { name: 'density', type: 'STRING' },
- { name: 'defaultCampusId', type: 'INT' },
- { name: 'emailDigest', type: 'BOOL' },
- ],
- },
- ],
- },
- services: [
- {
- domainId: 'OperationsOverview',
- filePath: 'Services/OperationsOverview.vs',
- purpose: 'dashboard KPIs and summary cards',
- methods: [
- { id: 'GetOverviewMetrics', params: 'campusId(INT)', returns: '{success:BOOL,data:OBJECT}' },
- ],
- },
- {
- domainId: 'ScheduleService',
- filePath: 'Services/ScheduleService.vs',
- purpose: 'technician assignment list and shift filters',
- methods: [
- { id: 'ListAssignments', params: 'campusId(INT),shiftStatus(STRING)', returns: '{success:BOOL,data:[{}]}' },
- ],
- },
- {
- domainId: 'WorkOrderService',
- filePath: 'Services/WorkOrderService.vs',
- purpose: 'work order list and status updates',
- methods: [
- { id: 'ListWorkOrders', params: 'campusId(INT),priority(STRING),status(STRING)', returns: '{success:BOOL,data:[{}]}' },
- { id: 'UpdateWorkOrderStatus', params: 'workOrderId(INT),status(STRING)', returns: '{success:BOOL}' },
- ],
- },
- {
- domainId: 'AlertService',
- filePath: 'Services/AlertService.vs',
- purpose: 'alert event list and acknowledgement',
- methods: [
- { id: 'ListAlerts', params: 'campusId(INT),severity(STRING),status(STRING)', returns: '{success:BOOL,data:[{}]}' },
- { id: 'AcknowledgeAlert', params: 'alertId(INT)', returns: '{success:BOOL}' },
- ],
- },
- {
- domainId: 'SettingsService',
- filePath: 'Services/SettingsService.vs',
- purpose: 'settings and threshold preferences',
- methods: [
- { id: 'GetSettings', params: 'campusId(INT)', returns: '{success:BOOL,data:OBJECT}' },
- { id: 'SaveSettings', params: 'campusId(INT),density(STRING),threshold(FLOAT)', returns: '{success:BOOL}' },
- ],
- },
- ],
- components: [
- {
- id: 'KpiCard',
- filePath: 'ExtComponents/KpiCard.cp',
- purpose: 'title, numeric value, helper text, optional intent tone',
- },
- {
- id: 'StatusPill',
- filePath: 'ExtComponents/StatusPill.cp',
- purpose: 'compact status chip for priority or lifecycle states',
- },
- {
- id: 'FilterToolbar',
- filePath: 'ExtComponents/FilterToolbar.cp',
- purpose: 'filter row with campus and status selectors plus clear action',
- },
- {
- id: 'AlertListItem',
- filePath: 'ExtComponents/AlertListItem.cp',
- purpose: 'alert row with severity, message, meta, and acknowledge button',
- },
- ],
- sections: [
- {
- id: 'OverviewPage',
- filePath: 'Sections/OverviewPage.sc',
- consumesServices: ['OperationsOverview.GetOverviewMetrics'],
- usesComponents: ['KpiCard'],
- purpose: 'overview dashboard with KPI cards and campus summary rows',
- },
- {
- id: 'ScheduleBoard',
- filePath: 'Sections/ScheduleBoard.sc',
- consumesServices: ['ScheduleService.ListAssignments'],
- usesComponents: ['FilterToolbar', 'StatusPill'],
- purpose: 'schedule and technician assignment table',
- },
- {
- id: 'WorkOrderDesk',
- filePath: 'Sections/WorkOrderDesk.sc',
- consumesServices: ['WorkOrderService.ListWorkOrders', 'WorkOrderService.UpdateWorkOrderStatus'],
- usesComponents: ['FilterToolbar', 'StatusPill'],
- purpose: 'work order list with status change action',
- },
- {
- id: 'AlertCenter',
- filePath: 'Sections/AlertCenter.sc',
- consumesServices: ['AlertService.ListAlerts', 'AlertService.AcknowledgeAlert'],
- usesComponents: ['FilterToolbar', 'AlertListItem'],
- purpose: 'alert center with severity list and acknowledgement actions',
- },
- {
- id: 'SettingsPage',
- filePath: 'Sections/SettingsPage.sc',
- consumesServices: ['SettingsService.GetSettings', 'SettingsService.SaveSettings'],
- usesComponents: ['StatusPill'],
- purpose: 'alert threshold and density settings form',
- },
- ],
- app: {
- id: 'CampusOpsApp',
- filePath: 'Apps/CampusOpsApp.vx',
- routes: [
- { path: 'overview', sectionId: 'OverviewPage' },
- { path: 'schedule', sectionId: 'ScheduleBoard' },
- { path: 'work-orders', sectionId: 'WorkOrderDesk' },
- { path: 'alerts', sectionId: 'AlertCenter' },
- { path: 'settings', sectionId: 'SettingsPage' },
- ],
- },
- };
- const APP_REQUIREMENT = `
- Build a desktop-first operations cockpit called CampusOps for a multi-campus facilities team.
- Business scope:
- - Roles: dispatcher and supervisor.
- - Routes/pages: /overview, /schedule, /work-orders, /alerts, /settings.
- - Domain data: Campus, Technician, WorkOrder, AlertRule, AlertEvent.
- - Key interactions:
- - Overview shows KPI cards for open work orders, overdue SLA, active alerts, and technician utilization.
- - Schedule page lists technician assignments by campus and lets users filter by campus, technician, and shift status.
- - Work order page lists work orders with filters by campus, priority, and status, and supports changing the order status.
- - Alert center lists alert events with severity, source campus, and acknowledgement/escalation actions.
- - Settings page edits alert thresholds and dashboard density preferences.
- Design direction:
- - Theme should feel enterprise and operational: deep teal primary, slate surfaces, amber warning, red danger, soft elevated cards, pill filters.
- - Prefer card/list/table layouts instead of advanced chart widgets.
- - Keep interactions compile-safe and easy to preview.
- Implementation constraints:
- - Use the exact file names listed below when generating the direct baseline.
- - Generate realistic mock data in the database file.
- - Use only VL-safe constructs; avoid speculative widgets or syntax.
- `.trim();
- const DIRECT_VL_DIGEST = `
- Latest reference baseline:
- - Latest DocCenter VL syntax document reports version 3.6.
- - Latest Theme doc is Theme-Enterprise-6.5 with styleSpaceVersion 1.6.
- - Current VLCode-Lite runtime, validator, and workflow toolchain in this repo are still pinned to VL 3.5.
- Compatibility target for this benchmark:
- - Every generated VL file must start with // VL_VERSION:3.5.
- - Stay within the shared 3.5-safe subset even when the latest syntax doc is newer.
- Essential VL rules distilled from the latest docs:
- - File types: .vx App, .sc Section, .cp Component, .vs ServiceDomain, .vdb Database, .vth Theme.
- - Cross references: App -> Section/Component only; Section -> ServiceDomain/Component only; Service and Component do not cross-reference others.
- - Indentation uses leading hyphens, never spaces.
- - App required section order: SysConfig, Frontend Global Vars, Frontend Derived Vars, Frontend Tree, Frontend Event Handlers, Frontend Internal Methods, Frontend Pipeline Funcs.
- - Section required section order: Frontend Public Props, Frontend Public Events, Frontend Public Methods, Frontend Global Vars, Frontend Derived Vars, Frontend Tree, Frontend Event Handlers, Frontend Internal Methods, Frontend Pipeline Funcs.
- - Component required section order: Frontend Public Props, Frontend Public Events, Frontend Derived Vars, Frontend Tree, Frontend Event Handlers, Frontend Internal Methods, Frontend Pipeline Funcs.
- - ServiceDomain required section order: Backend Environment Vars, Backend Tree, Services, Backend Event Handlers, Transactions, Backend Internal Methods, Backend Pipeline Funcs.
- - Style values must be static string literals only. Do not use ternary expressions or variable expressions inside style:.
- - Do not emit CSS-only skin props such as border-collapse.
- - Do not bind events directly on <For-*> nodes; bind on the interactive child inside the loop.
- - Prefer simple cards, rows, columns, text, buttons, input/select-like controls, if/for blocks, and service calls.
- Theme rules distilled from the latest docs:
- - Theme file order: # Meta -> # Design Tokens (optional) -> # Point Slot Values -> # Overrides (optional).
- - The heading must be exactly # Point Slot Values. Do not use legacy # Coordinate Values.
- - base_theme should stay Platform/Theme-Default-Light@1 for this benchmark.
- - Use enterprise-style point slots such as intent.*, emphasis.*, shape.*, surface.*, textRole.*, state.*, size.*, space.* when overriding appearance.
- `.trim();
- function normalizeCookie(cookie) {
- if (!cookie) return '';
- return String(cookie).startsWith('ih5bearer=') ? String(cookie) : `ih5bearer=${cookie}`;
- }
- function slugDate(dateText) {
- return String(dateText || '')
- .replace(/-/g, '')
- .slice(0, 8);
- }
- function projectNameWithFallback(baseName) {
- let candidate = baseName;
- let n = 2;
- while (fsSync.existsSync(path.join(TEST_ROOT, candidate))) {
- candidate = baseName.replace(/Test$/, `Run${n}Test`);
- n += 1;
- }
- return candidate;
- }
- async function ensureProjectScaffold(projectDir) {
- for (const rel of ['Apps', 'Sections', 'ExtComponents', 'Services', 'Database', 'Theme', 'Process', '.vl-code']) {
- await fs.mkdir(path.join(projectDir, rel), { recursive: true });
- }
- }
- async function fetchDoc(docId, cookie) {
- const res = await fetch(`${DOCCENTER_BASE}/SERVICE_DocCenter_GetDocById`, {
- method: 'POST',
- headers: {
- 'Content-Type': 'application/json',
- 'Cookie': normalizeCookie(cookie),
- },
- body: JSON.stringify({ docId }),
- });
- const data = await res.json();
- return data?.data?.currentContent || '';
- }
- async function runClaudePrompt(prompt, { systemPrompt = '', model = MODEL, timeoutMs = 20 * 60 * 1000 } = {}) {
- return await new Promise((resolve, reject) => {
- const args = ['--print', '--no-session-persistence', '--model', model, '--tools', ''];
- if (systemPrompt) args.push('--system-prompt', systemPrompt);
- const env = { ...process.env, NO_PROXY: 'localhost,127.0.0.1,::1' };
- delete env.CLAUDECODE;
- const proc = spawn('claude', args, {
- stdio: ['pipe', 'pipe', 'pipe'],
- env,
- });
- let stdout = '';
- let stderr = '';
- let finished = false;
- const timer = setTimeout(() => {
- proc.kill('SIGTERM');
- reject(new Error(`claude prompt timed out after ${Math.round(timeoutMs / 1000)}s`));
- }, timeoutMs);
- proc.stdout.on('data', (chunk) => {
- stdout += chunk.toString();
- });
- proc.stderr.on('data', (chunk) => {
- stderr += chunk.toString();
- });
- proc.on('error', (err) => {
- if (finished) return;
- finished = true;
- clearTimeout(timer);
- reject(err);
- });
- proc.on('close', (code) => {
- if (finished) return;
- finished = true;
- clearTimeout(timer);
- if (code !== 0) {
- reject(new Error(`claude exited with code ${code}: ${stderr.slice(0, 600)}`));
- return;
- }
- resolve(stdout.trim());
- });
- proc.stdin.write(prompt);
- proc.stdin.end();
- });
- }
- function extractJson(text) {
- const trimmed = String(text || '').trim();
- if (!trimmed) throw new Error('empty response');
- const fence = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/i);
- const candidate = fence ? fence[1].trim() : trimmed;
- try {
- return JSON.parse(candidate);
- } catch {}
- const firstBrace = candidate.indexOf('{');
- const lastBrace = candidate.lastIndexOf('}');
- if (firstBrace >= 0 && lastBrace > firstBrace) {
- const sliced = candidate.slice(firstBrace, lastBrace + 1);
- return JSON.parse(sliced);
- }
- throw new Error('could not extract JSON from response');
- }
- async function writeObjectFiles(projectDir, filesMap) {
- const written = [];
- for (const [relPath, content] of Object.entries(filesMap || {})) {
- const target = path.join(projectDir, relPath);
- await fs.mkdir(path.dirname(target), { recursive: true });
- await fs.writeFile(target, String(content), 'utf-8');
- written.push(relPath);
- }
- return written.sort();
- }
- async function mapLimit(items, limit, iterator) {
- const results = new Array(items.length);
- let nextIndex = 0;
- async function worker() {
- while (nextIndex < items.length) {
- const current = nextIndex;
- nextIndex += 1;
- results[current] = await iterator(items[current], current);
- }
- }
- const workers = Array.from({ length: Math.max(1, Math.min(limit, items.length)) }, () => worker());
- await Promise.all(workers);
- return results;
- }
- function buildDirectFilePrompt({ kind, target, latestThemeDoc }) {
- const contractJson = JSON.stringify(DIRECT_CONTRACT, null, 2);
- const base = `
- You are generating one VL file for the CampusOps benchmark.
- ${DIRECT_VL_DIGEST}
- Project contract:
- <project-contract>
- ${contractJson}
- </project-contract>
- Global requirement:
- ${APP_REQUIREMENT}
- Common output rules:
- - Output only raw VL source code for the requested file.
- - Do not use markdown fences.
- - The first line must be // VL_VERSION:3.5
- - Keep identifiers and file references exactly aligned with the project contract.
- - Use compile-safe VL only.
- `.trim();
- if (kind === 'database') {
- return `${base}
- Target file: ${target.filePath}
- Generate the .vdb file with realistic seed data for the declared tables. Keep relations simple and consistent with the contract.`;
- }
- if (kind === 'theme') {
- return `${base}
- Target file: ${target.filePath}
- Latest Theme 6.5 reference:
- <theme-doc>
- ${latestThemeDoc}
- </theme-doc>
- Generate a custom enterprise-light theme for CampusOps. It must keep base_theme:"Platform/Theme-Default-Light@1", use # Point Slot Values, and visibly reflect the contract palette.`;
- }
- if (kind === 'service') {
- return `${base}
- Target file: ${target.filePath}
- Service contract:
- ${JSON.stringify(target, null, 2)}
- Generate one ServiceDomain file with virtual tables or direct table access as needed. Keep query filters safe: skip optional filters when empty, 0, or -1.`;
- }
- if (kind === 'component') {
- return `${base}
- Target file: ${target.filePath}
- Component contract:
- ${JSON.stringify(target, null, 2)}
- Generate one reusable pure UI component.`;
- }
- if (kind === 'section') {
- return `${base}
- Target file: ${target.filePath}
- Section contract:
- ${JSON.stringify(target, null, 2)}
- Available services:
- ${JSON.stringify(DIRECT_CONTRACT.services, null, 2)}
- Available components:
- ${JSON.stringify(DIRECT_CONTRACT.components, null, 2)}
- Generate one Section file with local state, service calls, and simple event handlers.`;
- }
- if (kind === 'app') {
- return `${base}
- Target file: ${target.filePath}
- App contract:
- ${JSON.stringify(target, null, 2)}
- Available sections:
- ${JSON.stringify(DIRECT_CONTRACT.sections, null, 2)}
- Generate one App file that routes to the declared sections and uses a clear sidebar + content layout.`;
- }
- throw new Error(`Unknown direct generation kind: ${kind}`);
- }
- async function generateSingleDirectFile({ projectDir, kind, target, latestThemeDoc }) {
- const prompt = buildDirectFilePrompt({ kind, target, latestThemeDoc });
- const safeName = path.basename(target.filePath).replace(/[^\w.-]/g, '_');
- const promptDir = path.join(projectDir, 'Process', 'DirectGeneration');
- await fs.mkdir(promptDir, { recursive: true });
- await fs.writeFile(path.join(promptDir, `${safeName}.prompt.txt`), prompt, 'utf-8');
- const startedAt = performance.now();
- const raw = await runClaudePrompt(prompt, {
- systemPrompt: `Generate only the VL source for ${target.filePath}.`,
- timeoutMs: 8 * 60 * 1000,
- });
- const durationMs = Math.round(performance.now() - startedAt);
- await fs.writeFile(path.join(promptDir, `${safeName}.raw.txt`), raw, 'utf-8');
- const targetPath = path.join(projectDir, target.filePath);
- await fs.mkdir(path.dirname(targetPath), { recursive: true });
- await fs.writeFile(targetPath, raw.trim() + '\n', 'utf-8');
- return {
- filePath: target.filePath,
- kind,
- durationMs,
- };
- }
- async function runDirectBaseline({ projectDir, latestThemeDoc, latestVlVersion }) {
- await ensureProjectScaffold(projectDir);
- const contextPath = path.join(projectDir, 'Process', 'DirectContext.json');
- await fs.writeFile(contextPath, JSON.stringify({
- latestVlVersion,
- contract: DIRECT_CONTRACT,
- requiredPaths: DIRECT_REQUIRED_PATHS,
- }, null, 2), 'utf-8');
- const plan = [
- { kind: 'database', target: { filePath: DIRECT_CONTRACT.dataModel.filePath } },
- { kind: 'theme', target: { filePath: DIRECT_CONTRACT.theme.filePath } },
- ];
- const servicePlan = DIRECT_CONTRACT.services.map((service) => ({ kind: 'service', target: service }));
- const componentPlan = DIRECT_CONTRACT.components.map((component) => ({ kind: 'component', target: component }));
- const sectionPlan = DIRECT_CONTRACT.sections.map((section) => ({ kind: 'section', target: section }));
- const appPlan = [{ kind: 'app', target: DIRECT_CONTRACT.app }];
- const startedAt = performance.now();
- const fileStats = [];
- const phase1 = await mapLimit(plan, 2, (item) => generateSingleDirectFile({
- projectDir,
- latestThemeDoc,
- ...item,
- }));
- fileStats.push(...phase1);
- const phase2 = await mapLimit([...servicePlan, ...componentPlan], 4, (item) => generateSingleDirectFile({
- projectDir,
- latestThemeDoc,
- ...item,
- }));
- fileStats.push(...phase2);
- const phase3 = await mapLimit(sectionPlan, 3, (item) => generateSingleDirectFile({
- projectDir,
- latestThemeDoc,
- ...item,
- }));
- fileStats.push(...phase3);
- const phase4 = await mapLimit(appPlan, 1, (item) => generateSingleDirectFile({
- projectDir,
- latestThemeDoc,
- ...item,
- }));
- fileStats.push(...phase4);
- const durationMs = Math.round(performance.now() - startedAt);
- const actualPaths = fileStats.map((item) => item.filePath).sort();
- const missing = DIRECT_REQUIRED_PATHS.filter((relPath) => !actualPaths.includes(relPath));
- const extra = actualPaths.filter((relPath) => !DIRECT_REQUIRED_PATHS.includes(relPath));
- return {
- durationMs,
- firstArtifactMs: phase1.length ? Math.min(...phase1.map((item) => item.durationMs)) : null,
- missingPaths: missing,
- extraPaths: extra.sort(),
- writtenPaths: actualPaths,
- declaredProjectName: DIRECT_CONTRACT.projectName,
- fileStats,
- };
- }
- async function runWorkflowBaseline({ projectDir, cookie }) {
- await ensureProjectScaffold(projectDir);
- const workflow = JSON.parse(await fs.readFile(WORKFLOW_PATH, 'utf-8'));
- const executor = new WorkflowExecutor({
- workDir: projectDir,
- model: MODEL,
- llmProvider: 'cli',
- cookie,
- });
- const timeline = [];
- const logLines = [];
- const fileEvents = [];
- const startedAt = performance.now();
- await new Promise((resolve, reject) => {
- executor.execute(workflow, {
- userRequest: APP_REQUIREMENT,
- targetLang: TARGET_LANG,
- }, {
- onText: (text) => {
- const msg = String(text || '').trim();
- if (msg) logLines.push(msg);
- },
- onNodeStart: (info) => {
- timeline.push({
- nodeId: info.nodeId,
- title: info.title,
- type: info.type,
- status: 'start',
- at: new Date().toISOString(),
- });
- },
- onNodeDone: (info) => {
- timeline.push({
- nodeId: info.nodeId,
- title: info.title,
- type: info.type,
- status: 'done',
- at: new Date().toISOString(),
- duration_ms: info.duration_ms || 0,
- });
- },
- onNodeError: (info) => {
- timeline.push({
- nodeId: info.nodeId,
- title: info.title,
- type: info.type,
- status: 'error',
- at: new Date().toISOString(),
- error: info.error || 'unknown error',
- });
- },
- onFileWritten: (filePath) => {
- fileEvents.push({
- filePath,
- atMs: Math.round(performance.now() - startedAt),
- });
- },
- onDone: (info) => resolve(info),
- onError: (message) => reject(new Error(message || 'workflow generation failed')),
- });
- });
- const durationMs = Math.round(performance.now() - startedAt);
- await fs.writeFile(path.join(projectDir, 'Process', 'WorkflowTimeline.json'), JSON.stringify(timeline, null, 2), 'utf-8');
- await fs.writeFile(path.join(projectDir, 'Process', 'WorkflowLog.txt'), logLines.join('\n'), 'utf-8');
- await fs.writeFile(path.join(projectDir, 'Process', 'WorkflowFileEvents.json'), JSON.stringify(fileEvents, null, 2), 'utf-8');
- return {
- durationMs,
- timeline,
- firstArtifactMs: fileEvents.length ? Math.min(...fileEvents.map((item) => item.atMs)) : null,
- };
- }
- function parseValidationSummary(validationText) {
- const text = String(validationText || '');
- if (/All \d+ VL files passed validation\./.test(text)) {
- return { errors: 0, warnings: 0, raw: text };
- }
- const match = text.match(/Validation:\s+(\d+)\s+errors,\s+(\d+)\s+warnings/i);
- return {
- errors: match ? Number(match[1]) : null,
- warnings: match ? Number(match[2]) : null,
- raw: text,
- };
- }
- async function collectFileContents(projectDir) {
- const result = {};
- async function walk(currentDir, prefix = '') {
- const entries = await fs.readdir(currentDir, { withFileTypes: true });
- for (const entry of entries) {
- if (entry.name.startsWith('.')) {
- if (entry.name !== '.vl-code') continue;
- }
- const fullPath = path.join(currentDir, entry.name);
- const relPath = prefix ? `${prefix}/${entry.name}` : entry.name;
- if (entry.isDirectory()) {
- await walk(fullPath, relPath);
- } else if (/\.(vx|sc|cp|vs|vdb|vth|json|txt|md)$/i.test(entry.name)) {
- try {
- result[relPath] = await fs.readFile(fullPath, 'utf-8');
- } catch {}
- }
- }
- }
- await walk(projectDir);
- return result;
- }
- function countLocByExt(fileMap) {
- const stats = {};
- for (const [relPath, content] of Object.entries(fileMap || {})) {
- const ext = path.extname(relPath) || 'none';
- const lines = String(content || '').split('\n');
- if (!stats[ext]) stats[ext] = { files: 0, lines: 0, nonEmptyLines: 0 };
- stats[ext].files += 1;
- stats[ext].lines += lines.length;
- stats[ext].nonEmptyLines += lines.filter((line) => line.trim()).length;
- }
- return stats;
- }
- function summarizeTheme(themeContent) {
- const lines = String(themeContent || '').split('\n');
- const assignmentCount = lines.filter((line) => {
- const trimmed = line.trim();
- return trimmed && !trimmed.startsWith('//') && !trimmed.startsWith('#') && !trimmed.startsWith('<') && trimmed.includes(':');
- }).length;
- return {
- lines: lines.length,
- assignmentCount,
- hasPointSlotValues: /# Point Slot Values/.test(themeContent || ''),
- hasDesignTokens: /# Design Tokens/.test(themeContent || ''),
- hasBaseTheme: /base_theme:"Platform\/Theme-Default-Light@1"/.test(themeContent || ''),
- };
- }
- async function compileProject(projectDir, cookie) {
- const zipPath = path.join(projectDir, '__benchmark_compile.zip');
- const bodyCookie = normalizeCookie(cookie);
- try {
- execSync(
- `cd "${projectDir}" && find . -type f \\( -name "*.vx" -o -name "*.sc" -o -name "*.cp" -o -name "*.vs" -o -name "*.vdb" -o -name "*.vth" \\) | zip -q -@ "${zipPath}"`,
- { timeout: 30_000 },
- );
- const zipBuffer = await fs.readFile(zipPath);
- const dataUrl = `data:application/zip;base64,${zipBuffer.toString('base64')}`;
- const res = await fetch(`${PARSEVL_URL}/edtfn/parsevl`, {
- method: 'POST',
- headers: {
- 'Content-Type': 'application/json',
- 'Cookie': bodyCookie,
- },
- body: JSON.stringify({
- action: 'parsePjt',
- file: dataUrl,
- download: true,
- projectName: path.basename(projectDir),
- }),
- });
- const json = await res.json();
- const diagnostics = Array.isArray(json?.data?.errList) ? json.data.errList : [];
- const hardErrors = diagnostics.filter((item) => String(item?.level || '').toLowerCase() !== 'warning');
- const warnings = diagnostics.filter((item) => String(item?.level || '').toLowerCase() === 'warning');
- return {
- httpOk: res.ok,
- code: json?.code ?? null,
- success: res.ok && (json?.code === 0 || json?.code === 200) && hardErrors.length === 0,
- message: json?.message || null,
- errCount: hardErrors.length,
- warningCount: warnings.length,
- previewUrls: json?.data?.appPreviewUrlMap || {},
- errList: diagnostics.slice(0, 20),
- raw: json,
- };
- } catch (err) {
- return {
- success: false,
- error: err.message,
- errCount: null,
- warningCount: null,
- previewUrls: {},
- errList: [],
- };
- } finally {
- try {
- await fs.unlink(zipPath);
- } catch {}
- }
- }
- async function analyzeProject(projectDir, generationMeta) {
- const ctx = new VLProjectContext(projectDir);
- await ctx.load();
- const validateTool = createVLValidateTool(ctx);
- const validationText = await validateTool.execute({ file_path: 'all' });
- const validation = parseValidationSummary(validationText);
- const fileMap = await collectFileContents(projectDir);
- const vlFileMap = Object.fromEntries(
- Object.entries(fileMap).filter(([relPath]) => /\.(vx|sc|cp|vs|vdb|vth)$/i.test(relPath))
- );
- const metadata = extractFromFileTree(vlFileMap, projectDir);
- const metaValidation = validateMeta(metadata);
- const compile = await compileProject(projectDir, generationMeta.cookie);
- await fs.writeFile(path.join(projectDir, 'Process', 'BenchmarkValidation.txt'), validation.raw || '', 'utf-8');
- await fs.writeFile(path.join(projectDir, 'Process', 'BenchmarkCompile.json'), JSON.stringify(compile, null, 2), 'utf-8');
- if (metadata) {
- await fs.writeFile(path.join(projectDir, 'Process', 'BenchmarkExtractedMeta.json'), JSON.stringify(metadata, null, 2), 'utf-8');
- }
- const themeFile = Object.keys(vlFileMap).find((relPath) => relPath.endsWith('.vth'));
- return {
- projectDir,
- fileCount: Object.keys(vlFileMap).length,
- locByExt: countLocByExt(vlFileMap),
- validation,
- metadataValid: metaValidation.valid,
- metadataIssues: metaValidation.issues,
- compile,
- theme: summarizeTheme(themeFile ? vlFileMap[themeFile] : ''),
- extractedMeta: metadata
- ? {
- projectName: metadata.projectName || null,
- services: Array.isArray(metadata.services) ? metadata.services.length : 0,
- components: Array.isArray(metadata.components) ? metadata.components.length : 0,
- sections: Array.isArray(metadata.sections) ? metadata.sections.length : 0,
- apps: Array.isArray(metadata.apps) ? metadata.apps.length : 0,
- tables: Array.isArray(metadata.dataSchema?.tables) ? metadata.dataSchema.tables.length : 0,
- }
- : null,
- };
- }
- function summarizePathMetrics(label, runMeta, analysis) {
- return {
- label,
- durationMs: runMeta.durationMs,
- firstArtifactMs: runMeta.firstArtifactMs ?? null,
- fileCount: analysis.fileCount,
- validationErrors: analysis.validation.errors,
- validationWarnings: analysis.validation.warnings,
- metadataValid: analysis.metadataValid,
- metadataIssueCount: analysis.metadataIssues.length,
- compileSuccess: analysis.compile.success,
- compileErrors: analysis.compile.errCount,
- compileWarnings: analysis.compile.warningCount,
- previewCount: Object.keys(analysis.compile.previewUrls || {}).length,
- themeAssignments: analysis.theme.assignmentCount,
- themeLines: analysis.theme.lines,
- };
- }
- function buildMarkdownReport(context) {
- const { docs, directRun, workflowRun, directAnalysis, workflowAnalysis } = context;
- const directSummary = summarizePathMetrics('Direct', directRun, directAnalysis);
- const workflowSummary = summarizePathMetrics('Workflow', workflowRun, workflowAnalysis);
- return [
- `# LLM Codegen Benchmark (${TODAY})`,
- '',
- '## Scenario',
- '',
- `- App: CampusOps`,
- `- Model: ${MODEL}`,
- `- Target language: ${TARGET_LANG}`,
- `- Latest DocCenter VL syntax version observed: ${docs.latestVlVersion || 'unknown'}`,
- '- Latest Theme reference: Theme-Enterprise-6.5',
- '- Workflow baseline: VLCode-Lite 6-file codegen workflow',
- '',
- '## Requirement',
- '',
- APP_REQUIREMENT,
- '',
- '## Summary',
- '',
- `- Direct baseline project: ${directAnalysis.projectDir}`,
- `- Workflow baseline project: ${workflowAnalysis.projectDir}`,
- `- Direct duration: ${directSummary.durationMs} ms`,
- `- Workflow duration: ${workflowSummary.durationMs} ms`,
- `- Direct compile success: ${directSummary.compileSuccess}`,
- `- Workflow compile success: ${workflowSummary.compileSuccess}`,
- '',
- '## Direct',
- '',
- '```json',
- JSON.stringify(directSummary, null, 2),
- '```',
- '',
- '## Workflow',
- '',
- '```json',
- JSON.stringify(workflowSummary, null, 2),
- '```',
- '',
- '## Notes',
- '',
- `- Direct missing paths: ${directRun.missingPaths.join(', ') || 'none'}`,
- `- Direct extra paths: ${directRun.extraPaths.join(', ') || 'none'}`,
- `- Direct metadata issues: ${directAnalysis.metadataIssues.join(' | ') || 'none'}`,
- `- Workflow metadata issues: ${workflowAnalysis.metadataIssues.join(' | ') || 'none'}`,
- ].join('\n');
- }
- async function main() {
- await fs.mkdir(TEST_ROOT, { recursive: true });
- await fs.mkdir(REPORT_DIR, { recursive: true });
- const cookie = getCookie({ workDir: process.cwd(), cookie: '' });
- if (!cookie) {
- throw new Error('No DocCenter/cloud cookie found. Cannot run authenticated benchmark.');
- }
- console.log(`[benchmark] fetching latest docs from DocCenter`);
- const latestVlDoc = await fetchDoc(1, cookie);
- const latestThemeDoc = await fetchDoc(4, cookie);
- const latestVlVersion = latestVlDoc.match(/Current version:\s*`\/\/\s*VL_VERSION:([^`]+)`/i)?.[1]?.trim() || null;
- const directProjectName = projectNameWithFallback(`CampusOpsDirect${slugDate(TODAY)}Test`);
- const workflowProjectName = projectNameWithFallback(`CampusOpsWorkflow${slugDate(TODAY)}Test`);
- const directProjectDir = path.join(TEST_ROOT, directProjectName);
- const workflowProjectDir = path.join(TEST_ROOT, workflowProjectName);
- console.log(`[benchmark] direct baseline -> ${directProjectDir}`);
- const directRun = await runDirectBaseline({
- projectDir: directProjectDir,
- latestThemeDoc,
- latestVlVersion,
- });
- console.log(`[benchmark] workflow baseline -> ${workflowProjectDir}`);
- const workflowRun = await runWorkflowBaseline({
- projectDir: workflowProjectDir,
- cookie,
- });
- console.log('[benchmark] analyzing direct baseline');
- const directAnalysis = await analyzeProject(directProjectDir, { cookie });
- console.log('[benchmark] analyzing workflow baseline');
- const workflowAnalysis = await analyzeProject(workflowProjectDir, { cookie });
- const report = {
- createdAt: new Date().toISOString(),
- model: MODEL,
- latestDocs: {
- latestVlVersion,
- themeName: 'Theme-Enterprise-6.5',
- themeLength: latestThemeDoc.length,
- },
- requirement: APP_REQUIREMENT,
- direct: {
- run: directRun,
- analysis: directAnalysis,
- summary: summarizePathMetrics('Direct', directRun, directAnalysis),
- },
- workflow: {
- run: workflowRun,
- analysis: workflowAnalysis,
- summary: summarizePathMetrics('Workflow', workflowRun, workflowAnalysis),
- },
- };
- const reportJsonPath = path.join(REPORT_DIR, `llm-codegen-benchmark-${TODAY}.json`);
- const reportMdPath = path.join(REPORT_DIR, `llm-codegen-benchmark-${TODAY}.md`);
- await fs.writeFile(reportJsonPath, JSON.stringify(report, null, 2), 'utf-8');
- await fs.writeFile(reportMdPath, buildMarkdownReport({
- docs: report.latestDocs,
- directRun,
- workflowRun,
- directAnalysis,
- workflowAnalysis,
- }), 'utf-8');
- console.log('[benchmark] complete');
- console.log(JSON.stringify({
- reportJsonPath,
- reportMdPath,
- directProjectDir,
- workflowProjectDir,
- directSummary: report.direct.summary,
- workflowSummary: report.workflow.summary,
- }, null, 2));
- }
- main().catch((err) => {
- console.error('[benchmark] failed:', err.message);
- process.exitCode = 1;
- });
|