#!/usr/bin/env node import fs from 'fs/promises'; import fsSync from 'fs'; import os from 'os'; import path from 'path'; import { spawn, execSync } from 'child_process'; import { performance } from 'perf_hooks'; import { PARSEVL_URL } from '../src/data/versions.js'; import { getCookie } from '../src/server/helpers.js'; import { WorkflowExecutor } from '../src/vl/workflow-executor.js'; import { VLProjectContext } from '../src/vl/project-context.js'; import { createVLValidateTool } from '../src/tools/vl-validate.js'; import { extractFromFileTree, validateMeta } from '../src/vl/metadata-extractor.js'; const MODEL = process.env.VL_CODE_MODEL || 'claude-opus-4-6'; const TARGET_LANG = 'zh-CN'; const TODAY = new Date().toISOString().slice(0, 10); const TEST_ROOT = path.join(os.homedir(), 'Documents', 'VLProjects', '_tests'); const REPORT_DIR = path.join(process.cwd(), 'docs', 'benchmarks'); const WORKFLOW_PATH = path.join(process.cwd(), '.vl-code', 'workflows', '6-file-codegen.json'); const DOCCENTER_BASE = 'https://v4pre.visuallogic.ai/api/12027022'; const DIRECT_REQUIRED_PATHS = [ 'Database/CampusOps.vdb', 'Theme/CampusOpsTheme.vth', 'Services/OperationsOverview.vs', 'Services/ScheduleService.vs', 'Services/WorkOrderService.vs', 'Services/AlertService.vs', 'Services/SettingsService.vs', 'ExtComponents/KpiCard.cp', 'ExtComponents/StatusPill.cp', 'ExtComponents/FilterToolbar.cp', 'ExtComponents/AlertListItem.cp', 'Sections/OverviewPage.sc', 'Sections/ScheduleBoard.sc', 'Sections/WorkOrderDesk.sc', 'Sections/AlertCenter.sc', 'Sections/SettingsPage.sc', 'Apps/CampusOpsApp.vx', ]; const DIRECT_CONTRACT = { projectName: 'CampusOps', theme: { filePath: 'Theme/CampusOpsTheme.vth', style: 'enterprise-light', palette: { primary: '#0F7B6C', primaryHover: '#136E62', warning: '#F59E0B', danger: '#DC2626', success: '#16A34A', slate: '#334155', surface: '#F8FAFC', surfaceElevated: '#FFFFFF', }, direction: 'deep teal primary, slate neutrals, soft elevated cards, pill filters, enterprise operations dashboard', }, dataModel: { filePath: 'Database/CampusOps.vdb', tables: [ { id: 'Campus', fields: [ { name: 'name', type: 'STRING' }, { name: 'region', type: 'STRING' }, { name: 'manager', type: 'STRING' }, { name: 'activeAlerts', type: 'INT' }, ], }, { id: 'Technician', fields: [ { name: 'name', type: 'STRING' }, { name: 'campusId', type: 'INT' }, { name: 'skillTag', type: 'STRING' }, { name: 'shiftStatus', type: 'STRING' }, { name: 'utilizationRate', type: 'FLOAT' }, ], }, { id: 'WorkOrder', fields: [ { name: 'campusId', type: 'INT' }, { name: 'title', type: 'STRING' }, { name: 'priority', type: 'STRING' }, { name: 'status', type: 'STRING' }, { name: 'assigneeId', type: 'INT' }, { name: 'slaHours', type: 'INT' }, ], }, { id: 'AlertRule', fields: [ { name: 'campusId', type: 'INT' }, { name: 'ruleName', type: 'STRING' }, { name: 'thresholdValue', type: 'FLOAT' }, { name: 'enabled', type: 'BOOL' }, ], }, { id: 'AlertEvent', fields: [ { name: 'campusId', type: 'INT' }, { name: 'ruleId', type: 'INT' }, { name: 'severity', type: 'STRING' }, { name: 'status', type: 'STRING' }, { name: 'message', type: 'STRING' }, ], }, { id: 'UserPreference', fields: [ { name: 'density', type: 'STRING' }, { name: 'defaultCampusId', type: 'INT' }, { name: 'emailDigest', type: 'BOOL' }, ], }, ], }, services: [ { domainId: 'OperationsOverview', filePath: 'Services/OperationsOverview.vs', purpose: 'dashboard KPIs and summary cards', methods: [ { id: 'GetOverviewMetrics', params: 'campusId(INT)', returns: '{success:BOOL,data:OBJECT}' }, ], }, { domainId: 'ScheduleService', filePath: 'Services/ScheduleService.vs', purpose: 'technician assignment list and shift filters', methods: [ { id: 'ListAssignments', params: 'campusId(INT),shiftStatus(STRING)', returns: '{success:BOOL,data:[{}]}' }, ], }, { domainId: 'WorkOrderService', filePath: 'Services/WorkOrderService.vs', purpose: 'work order list and status updates', methods: [ { id: 'ListWorkOrders', params: 'campusId(INT),priority(STRING),status(STRING)', returns: '{success:BOOL,data:[{}]}' }, { id: 'UpdateWorkOrderStatus', params: 'workOrderId(INT),status(STRING)', returns: '{success:BOOL}' }, ], }, { domainId: 'AlertService', filePath: 'Services/AlertService.vs', purpose: 'alert event list and acknowledgement', methods: [ { id: 'ListAlerts', params: 'campusId(INT),severity(STRING),status(STRING)', returns: '{success:BOOL,data:[{}]}' }, { id: 'AcknowledgeAlert', params: 'alertId(INT)', returns: '{success:BOOL}' }, ], }, { domainId: 'SettingsService', filePath: 'Services/SettingsService.vs', purpose: 'settings and threshold preferences', methods: [ { id: 'GetSettings', params: 'campusId(INT)', returns: '{success:BOOL,data:OBJECT}' }, { id: 'SaveSettings', params: 'campusId(INT),density(STRING),threshold(FLOAT)', returns: '{success:BOOL}' }, ], }, ], components: [ { id: 'KpiCard', filePath: 'ExtComponents/KpiCard.cp', purpose: 'title, numeric value, helper text, optional intent tone', }, { id: 'StatusPill', filePath: 'ExtComponents/StatusPill.cp', purpose: 'compact status chip for priority or lifecycle states', }, { id: 'FilterToolbar', filePath: 'ExtComponents/FilterToolbar.cp', purpose: 'filter row with campus and status selectors plus clear action', }, { id: 'AlertListItem', filePath: 'ExtComponents/AlertListItem.cp', purpose: 'alert row with severity, message, meta, and acknowledge button', }, ], sections: [ { id: 'OverviewPage', filePath: 'Sections/OverviewPage.sc', consumesServices: ['OperationsOverview.GetOverviewMetrics'], usesComponents: ['KpiCard'], purpose: 'overview dashboard with KPI cards and campus summary rows', }, { id: 'ScheduleBoard', filePath: 'Sections/ScheduleBoard.sc', consumesServices: ['ScheduleService.ListAssignments'], usesComponents: ['FilterToolbar', 'StatusPill'], purpose: 'schedule and technician assignment table', }, { id: 'WorkOrderDesk', filePath: 'Sections/WorkOrderDesk.sc', consumesServices: ['WorkOrderService.ListWorkOrders', 'WorkOrderService.UpdateWorkOrderStatus'], usesComponents: ['FilterToolbar', 'StatusPill'], purpose: 'work order list with status change action', }, { id: 'AlertCenter', filePath: 'Sections/AlertCenter.sc', consumesServices: ['AlertService.ListAlerts', 'AlertService.AcknowledgeAlert'], usesComponents: ['FilterToolbar', 'AlertListItem'], purpose: 'alert center with severity list and acknowledgement actions', }, { id: 'SettingsPage', filePath: 'Sections/SettingsPage.sc', consumesServices: ['SettingsService.GetSettings', 'SettingsService.SaveSettings'], usesComponents: ['StatusPill'], purpose: 'alert threshold and density settings form', }, ], app: { id: 'CampusOpsApp', filePath: 'Apps/CampusOpsApp.vx', routes: [ { path: 'overview', sectionId: 'OverviewPage' }, { path: 'schedule', sectionId: 'ScheduleBoard' }, { path: 'work-orders', sectionId: 'WorkOrderDesk' }, { path: 'alerts', sectionId: 'AlertCenter' }, { path: 'settings', sectionId: 'SettingsPage' }, ], }, }; const APP_REQUIREMENT = ` Build a desktop-first operations cockpit called CampusOps for a multi-campus facilities team. Business scope: - Roles: dispatcher and supervisor. - Routes/pages: /overview, /schedule, /work-orders, /alerts, /settings. - Domain data: Campus, Technician, WorkOrder, AlertRule, AlertEvent. - Key interactions: - Overview shows KPI cards for open work orders, overdue SLA, active alerts, and technician utilization. - Schedule page lists technician assignments by campus and lets users filter by campus, technician, and shift status. - Work order page lists work orders with filters by campus, priority, and status, and supports changing the order status. - Alert center lists alert events with severity, source campus, and acknowledgement/escalation actions. - Settings page edits alert thresholds and dashboard density preferences. Design direction: - Theme should feel enterprise and operational: deep teal primary, slate surfaces, amber warning, red danger, soft elevated cards, pill filters. - Prefer card/list/table layouts instead of advanced chart widgets. - Keep interactions compile-safe and easy to preview. Implementation constraints: - Use the exact file names listed below when generating the direct baseline. - Generate realistic mock data in the database file. - Use only VL-safe constructs; avoid speculative widgets or syntax. `.trim(); const DIRECT_VL_DIGEST = ` Latest reference baseline: - Latest DocCenter VL syntax document reports version 3.6. - Latest Theme doc is Theme-Enterprise-6.5 with styleSpaceVersion 1.6. - Current VLCode-Lite runtime, validator, and workflow toolchain in this repo are still pinned to VL 3.5. Compatibility target for this benchmark: - Every generated VL file must start with // VL_VERSION:3.5. - Stay within the shared 3.5-safe subset even when the latest syntax doc is newer. Essential VL rules distilled from the latest docs: - File types: .vx App, .sc Section, .cp Component, .vs ServiceDomain, .vdb Database, .vth Theme. - Cross references: App -> Section/Component only; Section -> ServiceDomain/Component only; Service and Component do not cross-reference others. - Indentation uses leading hyphens, never spaces. - App required section order: SysConfig, Frontend Global Vars, Frontend Derived Vars, Frontend Tree, Frontend Event Handlers, Frontend Internal Methods, Frontend Pipeline Funcs. - Section required section order: Frontend Public Props, Frontend Public Events, Frontend Public Methods, Frontend Global Vars, Frontend Derived Vars, Frontend Tree, Frontend Event Handlers, Frontend Internal Methods, Frontend Pipeline Funcs. - Component required section order: Frontend Public Props, Frontend Public Events, Frontend Derived Vars, Frontend Tree, Frontend Event Handlers, Frontend Internal Methods, Frontend Pipeline Funcs. - ServiceDomain required section order: Backend Environment Vars, Backend Tree, Services, Backend Event Handlers, Transactions, Backend Internal Methods, Backend Pipeline Funcs. - Style values must be static string literals only. Do not use ternary expressions or variable expressions inside style:. - Do not emit CSS-only skin props such as border-collapse. - Do not bind events directly on nodes; bind on the interactive child inside the loop. - Prefer simple cards, rows, columns, text, buttons, input/select-like controls, if/for blocks, and service calls. Theme rules distilled from the latest docs: - Theme file order: # Meta -> # Design Tokens (optional) -> # Point Slot Values -> # Overrides (optional). - The heading must be exactly # Point Slot Values. Do not use legacy # Coordinate Values. - base_theme should stay Platform/Theme-Default-Light@1 for this benchmark. - Use enterprise-style point slots such as intent.*, emphasis.*, shape.*, surface.*, textRole.*, state.*, size.*, space.* when overriding appearance. `.trim(); function normalizeCookie(cookie) { if (!cookie) return ''; return String(cookie).startsWith('ih5bearer=') ? String(cookie) : `ih5bearer=${cookie}`; } function slugDate(dateText) { return String(dateText || '') .replace(/-/g, '') .slice(0, 8); } function projectNameWithFallback(baseName) { let candidate = baseName; let n = 2; while (fsSync.existsSync(path.join(TEST_ROOT, candidate))) { candidate = baseName.replace(/Test$/, `Run${n}Test`); n += 1; } return candidate; } async function ensureProjectScaffold(projectDir) { for (const rel of ['Apps', 'Sections', 'ExtComponents', 'Services', 'Database', 'Theme', 'Process', '.vl-code']) { await fs.mkdir(path.join(projectDir, rel), { recursive: true }); } } async function fetchDoc(docId, cookie) { const res = await fetch(`${DOCCENTER_BASE}/SERVICE_DocCenter_GetDocById`, { method: 'POST', headers: { 'Content-Type': 'application/json', 'Cookie': normalizeCookie(cookie), }, body: JSON.stringify({ docId }), }); const data = await res.json(); return data?.data?.currentContent || ''; } async function runClaudePrompt(prompt, { systemPrompt = '', model = MODEL, timeoutMs = 20 * 60 * 1000 } = {}) { return await new Promise((resolve, reject) => { const args = ['--print', '--no-session-persistence', '--model', model, '--tools', '']; if (systemPrompt) args.push('--system-prompt', systemPrompt); const env = { ...process.env, NO_PROXY: 'localhost,127.0.0.1,::1' }; delete env.CLAUDECODE; const proc = spawn('claude', args, { stdio: ['pipe', 'pipe', 'pipe'], env, }); let stdout = ''; let stderr = ''; let finished = false; const timer = setTimeout(() => { proc.kill('SIGTERM'); reject(new Error(`claude prompt timed out after ${Math.round(timeoutMs / 1000)}s`)); }, timeoutMs); proc.stdout.on('data', (chunk) => { stdout += chunk.toString(); }); proc.stderr.on('data', (chunk) => { stderr += chunk.toString(); }); proc.on('error', (err) => { if (finished) return; finished = true; clearTimeout(timer); reject(err); }); proc.on('close', (code) => { if (finished) return; finished = true; clearTimeout(timer); if (code !== 0) { reject(new Error(`claude exited with code ${code}: ${stderr.slice(0, 600)}`)); return; } resolve(stdout.trim()); }); proc.stdin.write(prompt); proc.stdin.end(); }); } function extractJson(text) { const trimmed = String(text || '').trim(); if (!trimmed) throw new Error('empty response'); const fence = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/i); const candidate = fence ? fence[1].trim() : trimmed; try { return JSON.parse(candidate); } catch {} const firstBrace = candidate.indexOf('{'); const lastBrace = candidate.lastIndexOf('}'); if (firstBrace >= 0 && lastBrace > firstBrace) { const sliced = candidate.slice(firstBrace, lastBrace + 1); return JSON.parse(sliced); } throw new Error('could not extract JSON from response'); } async function writeObjectFiles(projectDir, filesMap) { const written = []; for (const [relPath, content] of Object.entries(filesMap || {})) { const target = path.join(projectDir, relPath); await fs.mkdir(path.dirname(target), { recursive: true }); await fs.writeFile(target, String(content), 'utf-8'); written.push(relPath); } return written.sort(); } async function mapLimit(items, limit, iterator) { const results = new Array(items.length); let nextIndex = 0; async function worker() { while (nextIndex < items.length) { const current = nextIndex; nextIndex += 1; results[current] = await iterator(items[current], current); } } const workers = Array.from({ length: Math.max(1, Math.min(limit, items.length)) }, () => worker()); await Promise.all(workers); return results; } function buildDirectFilePrompt({ kind, target, latestThemeDoc }) { const contractJson = JSON.stringify(DIRECT_CONTRACT, null, 2); const base = ` You are generating one VL file for the CampusOps benchmark. ${DIRECT_VL_DIGEST} Project contract: ${contractJson} Global requirement: ${APP_REQUIREMENT} Common output rules: - Output only raw VL source code for the requested file. - Do not use markdown fences. - The first line must be // VL_VERSION:3.5 - Keep identifiers and file references exactly aligned with the project contract. - Use compile-safe VL only. `.trim(); if (kind === 'database') { return `${base} Target file: ${target.filePath} Generate the .vdb file with realistic seed data for the declared tables. Keep relations simple and consistent with the contract.`; } if (kind === 'theme') { return `${base} Target file: ${target.filePath} Latest Theme 6.5 reference: ${latestThemeDoc} Generate a custom enterprise-light theme for CampusOps. It must keep base_theme:"Platform/Theme-Default-Light@1", use # Point Slot Values, and visibly reflect the contract palette.`; } if (kind === 'service') { return `${base} Target file: ${target.filePath} Service contract: ${JSON.stringify(target, null, 2)} Generate one ServiceDomain file with virtual tables or direct table access as needed. Keep query filters safe: skip optional filters when empty, 0, or -1.`; } if (kind === 'component') { return `${base} Target file: ${target.filePath} Component contract: ${JSON.stringify(target, null, 2)} Generate one reusable pure UI component.`; } if (kind === 'section') { return `${base} Target file: ${target.filePath} Section contract: ${JSON.stringify(target, null, 2)} Available services: ${JSON.stringify(DIRECT_CONTRACT.services, null, 2)} Available components: ${JSON.stringify(DIRECT_CONTRACT.components, null, 2)} Generate one Section file with local state, service calls, and simple event handlers.`; } if (kind === 'app') { return `${base} Target file: ${target.filePath} App contract: ${JSON.stringify(target, null, 2)} Available sections: ${JSON.stringify(DIRECT_CONTRACT.sections, null, 2)} Generate one App file that routes to the declared sections and uses a clear sidebar + content layout.`; } throw new Error(`Unknown direct generation kind: ${kind}`); } async function generateSingleDirectFile({ projectDir, kind, target, latestThemeDoc }) { const prompt = buildDirectFilePrompt({ kind, target, latestThemeDoc }); const safeName = path.basename(target.filePath).replace(/[^\w.-]/g, '_'); const promptDir = path.join(projectDir, 'Process', 'DirectGeneration'); await fs.mkdir(promptDir, { recursive: true }); await fs.writeFile(path.join(promptDir, `${safeName}.prompt.txt`), prompt, 'utf-8'); const startedAt = performance.now(); const raw = await runClaudePrompt(prompt, { systemPrompt: `Generate only the VL source for ${target.filePath}.`, timeoutMs: 8 * 60 * 1000, }); const durationMs = Math.round(performance.now() - startedAt); await fs.writeFile(path.join(promptDir, `${safeName}.raw.txt`), raw, 'utf-8'); const targetPath = path.join(projectDir, target.filePath); await fs.mkdir(path.dirname(targetPath), { recursive: true }); await fs.writeFile(targetPath, raw.trim() + '\n', 'utf-8'); return { filePath: target.filePath, kind, durationMs, }; } async function runDirectBaseline({ projectDir, latestThemeDoc, latestVlVersion }) { await ensureProjectScaffold(projectDir); const contextPath = path.join(projectDir, 'Process', 'DirectContext.json'); await fs.writeFile(contextPath, JSON.stringify({ latestVlVersion, contract: DIRECT_CONTRACT, requiredPaths: DIRECT_REQUIRED_PATHS, }, null, 2), 'utf-8'); const plan = [ { kind: 'database', target: { filePath: DIRECT_CONTRACT.dataModel.filePath } }, { kind: 'theme', target: { filePath: DIRECT_CONTRACT.theme.filePath } }, ]; const servicePlan = DIRECT_CONTRACT.services.map((service) => ({ kind: 'service', target: service })); const componentPlan = DIRECT_CONTRACT.components.map((component) => ({ kind: 'component', target: component })); const sectionPlan = DIRECT_CONTRACT.sections.map((section) => ({ kind: 'section', target: section })); const appPlan = [{ kind: 'app', target: DIRECT_CONTRACT.app }]; const startedAt = performance.now(); const fileStats = []; const phase1 = await mapLimit(plan, 2, (item) => generateSingleDirectFile({ projectDir, latestThemeDoc, ...item, })); fileStats.push(...phase1); const phase2 = await mapLimit([...servicePlan, ...componentPlan], 4, (item) => generateSingleDirectFile({ projectDir, latestThemeDoc, ...item, })); fileStats.push(...phase2); const phase3 = await mapLimit(sectionPlan, 3, (item) => generateSingleDirectFile({ projectDir, latestThemeDoc, ...item, })); fileStats.push(...phase3); const phase4 = await mapLimit(appPlan, 1, (item) => generateSingleDirectFile({ projectDir, latestThemeDoc, ...item, })); fileStats.push(...phase4); const durationMs = Math.round(performance.now() - startedAt); const actualPaths = fileStats.map((item) => item.filePath).sort(); const missing = DIRECT_REQUIRED_PATHS.filter((relPath) => !actualPaths.includes(relPath)); const extra = actualPaths.filter((relPath) => !DIRECT_REQUIRED_PATHS.includes(relPath)); return { durationMs, firstArtifactMs: phase1.length ? Math.min(...phase1.map((item) => item.durationMs)) : null, missingPaths: missing, extraPaths: extra.sort(), writtenPaths: actualPaths, declaredProjectName: DIRECT_CONTRACT.projectName, fileStats, }; } async function runWorkflowBaseline({ projectDir, cookie }) { await ensureProjectScaffold(projectDir); const workflow = JSON.parse(await fs.readFile(WORKFLOW_PATH, 'utf-8')); const executor = new WorkflowExecutor({ workDir: projectDir, model: MODEL, llmProvider: 'cli', cookie, }); const timeline = []; const logLines = []; const fileEvents = []; const startedAt = performance.now(); await new Promise((resolve, reject) => { executor.execute(workflow, { userRequest: APP_REQUIREMENT, targetLang: TARGET_LANG, }, { onText: (text) => { const msg = String(text || '').trim(); if (msg) logLines.push(msg); }, onNodeStart: (info) => { timeline.push({ nodeId: info.nodeId, title: info.title, type: info.type, status: 'start', at: new Date().toISOString(), }); }, onNodeDone: (info) => { timeline.push({ nodeId: info.nodeId, title: info.title, type: info.type, status: 'done', at: new Date().toISOString(), duration_ms: info.duration_ms || 0, }); }, onNodeError: (info) => { timeline.push({ nodeId: info.nodeId, title: info.title, type: info.type, status: 'error', at: new Date().toISOString(), error: info.error || 'unknown error', }); }, onFileWritten: (filePath) => { fileEvents.push({ filePath, atMs: Math.round(performance.now() - startedAt), }); }, onDone: (info) => resolve(info), onError: (message) => reject(new Error(message || 'workflow generation failed')), }); }); const durationMs = Math.round(performance.now() - startedAt); await fs.writeFile(path.join(projectDir, 'Process', 'WorkflowTimeline.json'), JSON.stringify(timeline, null, 2), 'utf-8'); await fs.writeFile(path.join(projectDir, 'Process', 'WorkflowLog.txt'), logLines.join('\n'), 'utf-8'); await fs.writeFile(path.join(projectDir, 'Process', 'WorkflowFileEvents.json'), JSON.stringify(fileEvents, null, 2), 'utf-8'); return { durationMs, timeline, firstArtifactMs: fileEvents.length ? Math.min(...fileEvents.map((item) => item.atMs)) : null, }; } function parseValidationSummary(validationText) { const text = String(validationText || ''); if (/All \d+ VL files passed validation\./.test(text)) { return { errors: 0, warnings: 0, raw: text }; } const match = text.match(/Validation:\s+(\d+)\s+errors,\s+(\d+)\s+warnings/i); return { errors: match ? Number(match[1]) : null, warnings: match ? Number(match[2]) : null, raw: text, }; } async function collectFileContents(projectDir) { const result = {}; async function walk(currentDir, prefix = '') { const entries = await fs.readdir(currentDir, { withFileTypes: true }); for (const entry of entries) { if (entry.name.startsWith('.')) { if (entry.name !== '.vl-code') continue; } const fullPath = path.join(currentDir, entry.name); const relPath = prefix ? `${prefix}/${entry.name}` : entry.name; if (entry.isDirectory()) { await walk(fullPath, relPath); } else if (/\.(vx|sc|cp|vs|vdb|vth|json|txt|md)$/i.test(entry.name)) { try { result[relPath] = await fs.readFile(fullPath, 'utf-8'); } catch {} } } } await walk(projectDir); return result; } function countLocByExt(fileMap) { const stats = {}; for (const [relPath, content] of Object.entries(fileMap || {})) { const ext = path.extname(relPath) || 'none'; const lines = String(content || '').split('\n'); if (!stats[ext]) stats[ext] = { files: 0, lines: 0, nonEmptyLines: 0 }; stats[ext].files += 1; stats[ext].lines += lines.length; stats[ext].nonEmptyLines += lines.filter((line) => line.trim()).length; } return stats; } function summarizeTheme(themeContent) { const lines = String(themeContent || '').split('\n'); const assignmentCount = lines.filter((line) => { const trimmed = line.trim(); return trimmed && !trimmed.startsWith('//') && !trimmed.startsWith('#') && !trimmed.startsWith('<') && trimmed.includes(':'); }).length; return { lines: lines.length, assignmentCount, hasPointSlotValues: /# Point Slot Values/.test(themeContent || ''), hasDesignTokens: /# Design Tokens/.test(themeContent || ''), hasBaseTheme: /base_theme:"Platform\/Theme-Default-Light@1"/.test(themeContent || ''), }; } async function compileProject(projectDir, cookie) { const zipPath = path.join(projectDir, '__benchmark_compile.zip'); const bodyCookie = normalizeCookie(cookie); try { execSync( `cd "${projectDir}" && find . -type f \\( -name "*.vx" -o -name "*.sc" -o -name "*.cp" -o -name "*.vs" -o -name "*.vdb" -o -name "*.vth" \\) | zip -q -@ "${zipPath}"`, { timeout: 30_000 }, ); const zipBuffer = await fs.readFile(zipPath); const dataUrl = `data:application/zip;base64,${zipBuffer.toString('base64')}`; const res = await fetch(`${PARSEVL_URL}/edtfn/parsevl`, { method: 'POST', headers: { 'Content-Type': 'application/json', 'Cookie': bodyCookie, }, body: JSON.stringify({ action: 'parsePjt', file: dataUrl, download: true, projectName: path.basename(projectDir), }), }); const json = await res.json(); const diagnostics = Array.isArray(json?.data?.errList) ? json.data.errList : []; const hardErrors = diagnostics.filter((item) => String(item?.level || '').toLowerCase() !== 'warning'); const warnings = diagnostics.filter((item) => String(item?.level || '').toLowerCase() === 'warning'); return { httpOk: res.ok, code: json?.code ?? null, success: res.ok && (json?.code === 0 || json?.code === 200) && hardErrors.length === 0, message: json?.message || null, errCount: hardErrors.length, warningCount: warnings.length, previewUrls: json?.data?.appPreviewUrlMap || {}, errList: diagnostics.slice(0, 20), raw: json, }; } catch (err) { return { success: false, error: err.message, errCount: null, warningCount: null, previewUrls: {}, errList: [], }; } finally { try { await fs.unlink(zipPath); } catch {} } } async function analyzeProject(projectDir, generationMeta) { const ctx = new VLProjectContext(projectDir); await ctx.load(); const validateTool = createVLValidateTool(ctx); const validationText = await validateTool.execute({ file_path: 'all' }); const validation = parseValidationSummary(validationText); const fileMap = await collectFileContents(projectDir); const vlFileMap = Object.fromEntries( Object.entries(fileMap).filter(([relPath]) => /\.(vx|sc|cp|vs|vdb|vth)$/i.test(relPath)) ); const metadata = extractFromFileTree(vlFileMap, projectDir); const metaValidation = validateMeta(metadata); const compile = await compileProject(projectDir, generationMeta.cookie); await fs.writeFile(path.join(projectDir, 'Process', 'BenchmarkValidation.txt'), validation.raw || '', 'utf-8'); await fs.writeFile(path.join(projectDir, 'Process', 'BenchmarkCompile.json'), JSON.stringify(compile, null, 2), 'utf-8'); if (metadata) { await fs.writeFile(path.join(projectDir, 'Process', 'BenchmarkExtractedMeta.json'), JSON.stringify(metadata, null, 2), 'utf-8'); } const themeFile = Object.keys(vlFileMap).find((relPath) => relPath.endsWith('.vth')); return { projectDir, fileCount: Object.keys(vlFileMap).length, locByExt: countLocByExt(vlFileMap), validation, metadataValid: metaValidation.valid, metadataIssues: metaValidation.issues, compile, theme: summarizeTheme(themeFile ? vlFileMap[themeFile] : ''), extractedMeta: metadata ? { projectName: metadata.projectName || null, services: Array.isArray(metadata.services) ? metadata.services.length : 0, components: Array.isArray(metadata.components) ? metadata.components.length : 0, sections: Array.isArray(metadata.sections) ? metadata.sections.length : 0, apps: Array.isArray(metadata.apps) ? metadata.apps.length : 0, tables: Array.isArray(metadata.dataSchema?.tables) ? metadata.dataSchema.tables.length : 0, } : null, }; } function summarizePathMetrics(label, runMeta, analysis) { return { label, durationMs: runMeta.durationMs, firstArtifactMs: runMeta.firstArtifactMs ?? null, fileCount: analysis.fileCount, validationErrors: analysis.validation.errors, validationWarnings: analysis.validation.warnings, metadataValid: analysis.metadataValid, metadataIssueCount: analysis.metadataIssues.length, compileSuccess: analysis.compile.success, compileErrors: analysis.compile.errCount, compileWarnings: analysis.compile.warningCount, previewCount: Object.keys(analysis.compile.previewUrls || {}).length, themeAssignments: analysis.theme.assignmentCount, themeLines: analysis.theme.lines, }; } function buildMarkdownReport(context) { const { docs, directRun, workflowRun, directAnalysis, workflowAnalysis } = context; const directSummary = summarizePathMetrics('Direct', directRun, directAnalysis); const workflowSummary = summarizePathMetrics('Workflow', workflowRun, workflowAnalysis); return [ `# LLM Codegen Benchmark (${TODAY})`, '', '## Scenario', '', `- App: CampusOps`, `- Model: ${MODEL}`, `- Target language: ${TARGET_LANG}`, `- Latest DocCenter VL syntax version observed: ${docs.latestVlVersion || 'unknown'}`, '- Latest Theme reference: Theme-Enterprise-6.5', '- Workflow baseline: VLCode-Lite 6-file codegen workflow', '', '## Requirement', '', APP_REQUIREMENT, '', '## Summary', '', `- Direct baseline project: ${directAnalysis.projectDir}`, `- Workflow baseline project: ${workflowAnalysis.projectDir}`, `- Direct duration: ${directSummary.durationMs} ms`, `- Workflow duration: ${workflowSummary.durationMs} ms`, `- Direct compile success: ${directSummary.compileSuccess}`, `- Workflow compile success: ${workflowSummary.compileSuccess}`, '', '## Direct', '', '```json', JSON.stringify(directSummary, null, 2), '```', '', '## Workflow', '', '```json', JSON.stringify(workflowSummary, null, 2), '```', '', '## Notes', '', `- Direct missing paths: ${directRun.missingPaths.join(', ') || 'none'}`, `- Direct extra paths: ${directRun.extraPaths.join(', ') || 'none'}`, `- Direct metadata issues: ${directAnalysis.metadataIssues.join(' | ') || 'none'}`, `- Workflow metadata issues: ${workflowAnalysis.metadataIssues.join(' | ') || 'none'}`, ].join('\n'); } async function main() { await fs.mkdir(TEST_ROOT, { recursive: true }); await fs.mkdir(REPORT_DIR, { recursive: true }); const cookie = getCookie({ workDir: process.cwd(), cookie: '' }); if (!cookie) { throw new Error('No DocCenter/cloud cookie found. Cannot run authenticated benchmark.'); } console.log(`[benchmark] fetching latest docs from DocCenter`); const latestVlDoc = await fetchDoc(1, cookie); const latestThemeDoc = await fetchDoc(4, cookie); const latestVlVersion = latestVlDoc.match(/Current version:\s*`\/\/\s*VL_VERSION:([^`]+)`/i)?.[1]?.trim() || null; const directProjectName = projectNameWithFallback(`CampusOpsDirect${slugDate(TODAY)}Test`); const workflowProjectName = projectNameWithFallback(`CampusOpsWorkflow${slugDate(TODAY)}Test`); const directProjectDir = path.join(TEST_ROOT, directProjectName); const workflowProjectDir = path.join(TEST_ROOT, workflowProjectName); console.log(`[benchmark] direct baseline -> ${directProjectDir}`); const directRun = await runDirectBaseline({ projectDir: directProjectDir, latestThemeDoc, latestVlVersion, }); console.log(`[benchmark] workflow baseline -> ${workflowProjectDir}`); const workflowRun = await runWorkflowBaseline({ projectDir: workflowProjectDir, cookie, }); console.log('[benchmark] analyzing direct baseline'); const directAnalysis = await analyzeProject(directProjectDir, { cookie }); console.log('[benchmark] analyzing workflow baseline'); const workflowAnalysis = await analyzeProject(workflowProjectDir, { cookie }); const report = { createdAt: new Date().toISOString(), model: MODEL, latestDocs: { latestVlVersion, themeName: 'Theme-Enterprise-6.5', themeLength: latestThemeDoc.length, }, requirement: APP_REQUIREMENT, direct: { run: directRun, analysis: directAnalysis, summary: summarizePathMetrics('Direct', directRun, directAnalysis), }, workflow: { run: workflowRun, analysis: workflowAnalysis, summary: summarizePathMetrics('Workflow', workflowRun, workflowAnalysis), }, }; const reportJsonPath = path.join(REPORT_DIR, `llm-codegen-benchmark-${TODAY}.json`); const reportMdPath = path.join(REPORT_DIR, `llm-codegen-benchmark-${TODAY}.md`); await fs.writeFile(reportJsonPath, JSON.stringify(report, null, 2), 'utf-8'); await fs.writeFile(reportMdPath, buildMarkdownReport({ docs: report.latestDocs, directRun, workflowRun, directAnalysis, workflowAnalysis, }), 'utf-8'); console.log('[benchmark] complete'); console.log(JSON.stringify({ reportJsonPath, reportMdPath, directProjectDir, workflowProjectDir, directSummary: report.direct.summary, workflowSummary: report.workflow.summary, }, null, 2)); } main().catch((err) => { console.error('[benchmark] failed:', err.message); process.exitCode = 1; });