test-workflow-executor.js 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. import fs from 'fs/promises';
  2. import path from 'path';
  3. import { WorkflowExecutor } from './src/vl/workflow-executor.js';
  4. import { prepareCheckpointForRerun } from './src/server/routes/workflow.js';
  5. import { ToolRegistry } from './src/core/tool-registry.js';
  6. import { createReadFileTool } from './src/tools/read-file.js';
  7. let passed = 0;
  8. let failed = 0;
  9. function test(name, fn) {
  10. return Promise.resolve()
  11. .then(fn)
  12. .then(() => {
  13. console.log(` ✓ ${name}`);
  14. passed++;
  15. })
  16. .catch((err) => {
  17. console.log(` ✗ ${name}: ${err.message}`);
  18. failed++;
  19. });
  20. }
  21. function assertEqual(a, b, msg) {
  22. if (a !== b) throw new Error(msg || `Expected ${JSON.stringify(b)}, got ${JSON.stringify(a)}`);
  23. }
  24. function assert(cond, msg) {
  25. if (!cond) throw new Error(msg || 'Assertion failed');
  26. }
  27. const tmpDir = '/tmp/vlcode-lite-workflow-executor-test';
  28. const outputPath = path.join(tmpDir, 'rerun-output.txt');
  29. const complexDir = path.join(tmpDir, 'complex-rerun');
  30. const complexSummaryPath = path.join(complexDir, 'Artifacts/summary.txt');
  31. const complexAuditPath = path.join(complexDir, 'Artifacts/audit.txt');
  32. const registryReadPath = path.join(tmpDir, 'tool-read.txt');
  33. const workflow = {
  34. version: '3.16',
  35. name: 'ExecutorRerunSmoke',
  36. steps: [
  37. { id: 'Set_Default', target: '$msg', value: '=\"first\"', next: 'Write_Output' },
  38. { id: 'Write_Output', target: '/rerun-output.txt', value: '=$msg', mode: 'overwrite', next: 'Stop_End' },
  39. { id: 'Stop_End' },
  40. ],
  41. };
  42. const actualRegistryToolWorkflow = {
  43. version: '3.16',
  44. name: 'ExecutorActualRegistryTool',
  45. steps: [
  46. {
  47. id: 'Tool_010_ReadFile',
  48. input: { file_path: '="tool-read.txt"' },
  49. out: {
  50. '$toolRead': '=_result',
  51. '$toolReadRaw': '=_toolResult.result',
  52. },
  53. next: 'Stop_End',
  54. },
  55. { id: 'Stop_End' },
  56. ],
  57. };
  58. const complexWorkflow = {
  59. version: '3.16',
  60. name: 'ExecutorComplexRerun',
  61. steps: [
  62. { id: 'Set_010_Numbers', target: '$nums', value: '=[2,4,6,8]', next: 'Set_020_Sum' },
  63. { id: 'Set_020_Sum', target: '$sum', value: '=0', next: 'Set_030_Audit' },
  64. { id: 'Set_030_Audit', target: '$audit', value: '=[]', next: 'Fork_040_Prepare' },
  65. { id: 'Fork_040_Prepare', children: ['Set_041_BranchA', 'Set_042_BranchB'], next: 'Loop_050_Sum' },
  66. { id: 'Set_041_BranchA', target: '$branchA', value: '=\"branch-A-ready\"', next: 'Write_043_BranchA' },
  67. { id: 'Write_043_BranchA', target: '=\"Artifacts/branch-a.txt\"', value: '=$branchA' },
  68. { id: 'Set_042_BranchB', target: '$branchB', value: '=\"branch-B-ready\"', next: 'Write_044_BranchB' },
  69. { id: 'Write_044_BranchB', target: '=\"Artifacts/branch-b.txt\"', value: '=$branchB' },
  70. { id: 'Loop_050_Sum', source: '$nums', mode: 'serial', children: ['Set_051_Add', 'Set_052_AuditEntry'], next: 'Pause_060_Review' },
  71. { id: 'Set_051_Add', target: '$sum', value: '=$sum + _item' },
  72. { id: 'Set_052_AuditEntry', target: '$audit[_index]', value: '=\"i=\" + _index + \";n=\" + _item + \";sum=\" + $sum' },
  73. { id: 'Pause_060_Review', reason: 'Review', next: 'Fork_070_Finalize' },
  74. { id: 'Fork_070_Finalize', children: ['Write_071_Summary', 'Write_072_Audit'], next: 'Stop_080_End' },
  75. { id: 'Write_071_Summary', target: '=\"Artifacts/summary.txt\"', value: '=\"sum=\" + $sum + \";count=\" + $nums.length' },
  76. { id: 'Write_072_Audit', target: '=\"Artifacts/audit.txt\"', value: '=$audit' },
  77. { id: 'Stop_080_End' },
  78. ],
  79. };
  80. const toolWorkflow = {
  81. version: '3.16',
  82. name: 'ExecutorToolStep',
  83. steps: [
  84. {
  85. id: 'Tool_010_EchoTool',
  86. tool: 'EchoTool',
  87. input: { text: '="hello"', nested: { source: '="workflow"' } },
  88. out: {
  89. '$echoed': '=_result.echoed',
  90. '$toolSource': '=_result.meta.source',
  91. },
  92. next: 'Stop_End',
  93. },
  94. { id: 'Stop_End' },
  95. ],
  96. };
  97. const toolAllowErrorWorkflow = {
  98. version: '3.16',
  99. name: 'ExecutorToolAllowError',
  100. steps: [
  101. {
  102. id: 'Tool_010_FailTool',
  103. tool: 'FailTool',
  104. allowError: true,
  105. input: { reason: '="expected"' },
  106. out: {
  107. '$toolOk': '=_result.ok',
  108. '$toolError': '=_result.error',
  109. },
  110. next: 'Stop_End',
  111. },
  112. { id: 'Stop_End' },
  113. ],
  114. };
  115. const toolMessageWorkflow = {
  116. version: '3.16',
  117. name: 'ExecutorToolMessage',
  118. steps: [
  119. {
  120. id: 'Tool_010_MessageTool',
  121. tool: 'MessageTool',
  122. input: { query: '="inspect"' },
  123. out: {
  124. '$toolOk': '=_result.ok',
  125. },
  126. next: 'Stop_End',
  127. },
  128. { id: 'Stop_End' },
  129. ],
  130. };
  131. console.log('\n── Workflow Executor Regression ──');
  132. await fs.mkdir(tmpDir, { recursive: true });
  133. await fs.rm(outputPath, { force: true });
  134. await fs.rm(complexDir, { recursive: true, force: true });
  135. await fs.mkdir(complexDir, { recursive: true });
  136. await fs.writeFile(registryReadPath, 'registry-tool-ok', 'utf8');
  137. await test('executeFrom handles checkpoint.artifacts object for rerun overrides', async () => {
  138. let checkpoint = null;
  139. const first = new WorkflowExecutor({ workDir: tmpDir, model: 'claude-opus-4-6' });
  140. await first.execute(workflow, {}, {
  141. onCheckpoint: (cp) => { checkpoint = cp; },
  142. });
  143. assert(checkpoint, 'checkpoint missing after initial execution');
  144. assertEqual(await fs.readFile(outputPath, 'utf8'), 'first');
  145. assert(checkpoint.artifacts && !Array.isArray(checkpoint.artifacts), 'checkpoint.artifacts should be an object');
  146. const rerun = new WorkflowExecutor({ workDir: tmpDir, model: 'claude-opus-4-6' });
  147. await rerun.executeFrom(
  148. workflow,
  149. { ...checkpoint, currentStepID: 'Write_Output' },
  150. { '$msg': 'edited' },
  151. {}
  152. );
  153. assertEqual(await fs.readFile(outputPath, 'utf8'), 'edited');
  154. });
  155. await test('prepareCheckpointForRerun clears fork/loop resume state for complex reruns', async () => {
  156. let checkpoint = null;
  157. const first = new WorkflowExecutor({ workDir: complexDir, model: 'claude-opus-4-6' });
  158. let resumed = false;
  159. await first.execute(complexWorkflow, {}, {
  160. onCheckpoint: (cp) => { checkpoint = cp; },
  161. onPause(info) {
  162. if (resumed) return;
  163. resumed = true;
  164. setTimeout(() => first.resume(info.nodeId), 10);
  165. },
  166. });
  167. assert(checkpoint, 'complex checkpoint missing after initial execution');
  168. assertEqual(await fs.readFile(complexSummaryPath, 'utf8'), 'sum=20;count=4');
  169. const forkCheckpoint = prepareCheckpointForRerun(complexWorkflow, checkpoint, 'Fork_070_Finalize');
  170. const forkRerun = new WorkflowExecutor({ workDir: complexDir, model: 'claude-opus-4-6' });
  171. await forkRerun.executeFrom(complexWorkflow, forkCheckpoint, { '$sum': 99 }, {});
  172. assertEqual(await fs.readFile(complexSummaryPath, 'utf8'), 'sum=99;count=4');
  173. const loopCheckpoint = prepareCheckpointForRerun(complexWorkflow, checkpoint, 'Loop_050_Sum');
  174. const loopRerun = new WorkflowExecutor({ workDir: complexDir, model: 'claude-opus-4-6' });
  175. let rerunPaused = false;
  176. await loopRerun.executeFrom(complexWorkflow, loopCheckpoint, {
  177. '$nums': [3, 3, 3],
  178. '$sum': 0,
  179. '$audit': [],
  180. }, {
  181. onPause(info) {
  182. if (rerunPaused) return;
  183. rerunPaused = true;
  184. setTimeout(() => loopRerun.resume(info.nodeId), 10);
  185. },
  186. });
  187. assertEqual(await fs.readFile(complexSummaryPath, 'utf8'), 'sum=9;count=3');
  188. assertEqual(
  189. await fs.readFile(complexAuditPath, 'utf8'),
  190. '["i=0;n=3;sum=3","i=1;n=3;sum=6","i=2;n=3;sum=9"]'
  191. );
  192. });
  193. await test('Tool_* executes local tool registry and maps _result outputs', async () => {
  194. const calls = [];
  195. const toolRegistry = {
  196. async execute(name, input) {
  197. calls.push({ name, input });
  198. return { result: { echoed: String(input.text).toUpperCase(), meta: { source: input.nested?.source || '' } } };
  199. },
  200. };
  201. const executor = new WorkflowExecutor({ workDir: tmpDir, model: 'claude-opus-4-6', toolRegistry });
  202. const events = [];
  203. await executor.execute(toolWorkflow, {}, {
  204. onToolStart: (info) => events.push({ type: 'start', ...info }),
  205. onToolDone: (info) => events.push({ type: 'done', ...info }),
  206. });
  207. assertEqual(calls.length, 1, 'tool should be invoked once');
  208. assertEqual(calls[0].name, 'EchoTool');
  209. assertEqual(calls[0].input.text, 'hello');
  210. assertEqual(calls[0].input.nested.source, 'workflow');
  211. assertEqual(executor._ctx.variables.$echoed, 'HELLO');
  212. assertEqual(executor._ctx.variables.$toolSource, 'workflow');
  213. assert(events.some((event) => event.type === 'start' && event.name === 'EchoTool'), 'missing tool start event');
  214. assert(events.some((event) => event.type === 'done' && event.name === 'EchoTool'), 'missing tool done event');
  215. });
  216. await test('Tool_* allowError captures tool failure into _result without aborting workflow', async () => {
  217. const toolRegistry = {
  218. async execute() {
  219. return { error: 'tool-failed' };
  220. },
  221. };
  222. const executor = new WorkflowExecutor({ workDir: tmpDir, model: 'claude-opus-4-6', toolRegistry });
  223. const errors = [];
  224. await executor.execute(toolAllowErrorWorkflow, {}, {
  225. onToolError: (info) => errors.push(info),
  226. });
  227. assertEqual(executor._ctx.variables.$toolOk, false);
  228. assertEqual(executor._ctx.variables.$toolError, 'tool-failed');
  229. assert(errors.some((event) => event.name === 'FailTool' && event.allowError === true), 'missing allowError tool event');
  230. });
  231. await test('Tool_* executes an actual registered VLCode tool via ToolRegistry', async () => {
  232. const toolRegistry = new ToolRegistry();
  233. toolRegistry.register('ReadFile', createReadFileTool({ workDir: tmpDir }));
  234. const executor = new WorkflowExecutor({ workDir: tmpDir, model: 'claude-opus-4-6', toolRegistry });
  235. await executor.execute(actualRegistryToolWorkflow, {}, {});
  236. assert(String(executor._ctx.variables.$toolRead || '').includes('registry-tool-ok'), 'ReadFile result missing expected content');
  237. assert(String(executor._ctx.variables.$toolReadRaw || '').includes('registry-tool-ok'), 'raw tool result missing expected content');
  238. });
  239. await test('Tool_* forwards tool runtime messages through onToolMessage', async () => {
  240. const toolRegistry = new ToolRegistry();
  241. toolRegistry.register('MessageTool', {
  242. description: 'Emit runtime messages for workflow testing',
  243. parameters: { type: 'object', properties: { query: { type: 'string' } }, required: ['query'] },
  244. async execute(input, runtime) {
  245. runtime.info?.('starting inspection', { query: input.query });
  246. runtime.progress?.('halfway', { percent: 50 });
  247. return { result: { ok: true } };
  248. },
  249. });
  250. const executor = new WorkflowExecutor({ workDir: tmpDir, model: 'claude-opus-4-6', toolRegistry });
  251. const messages = [];
  252. await executor.execute(toolMessageWorkflow, {}, {
  253. onToolMessage: (info) => messages.push(info),
  254. });
  255. assertEqual(executor._ctx.variables.$toolOk, true);
  256. assert(messages.some((info) => info.name === 'MessageTool' && info.message === 'starting inspection'), 'missing info tool message');
  257. assert(messages.some((info) => info.name === 'MessageTool' && info.message === 'halfway'), 'missing progress tool message');
  258. });
  259. await test('DocCenter path 1 is pinned to bundled local VL syntax', async () => {
  260. const originalFetch = globalThis.fetch;
  261. const executor = new WorkflowExecutor({ workDir: tmpDir, model: 'claude-opus-4-6', cookie: 'test-cookie' });
  262. const docs = { '1': 'remote syntax placeholder', '100': 'remote meta prompt placeholder' };
  263. globalThis.fetch = async (url, options = {}) => {
  264. if (String(url).includes('SERVICE_DocCenter_GetDocList')) {
  265. return {
  266. async json() {
  267. return {
  268. data: [
  269. { path: '1', _id: 1 },
  270. { path: '100', _id: 100 },
  271. ],
  272. };
  273. },
  274. };
  275. }
  276. const body = JSON.parse(options.body || '{}');
  277. return {
  278. async json() {
  279. return {
  280. data: {
  281. currentContent: body.docId === 1
  282. ? 'Current version: `// VL_VERSION:3.6`'
  283. : 'remote meta prompt',
  284. },
  285. };
  286. },
  287. };
  288. };
  289. try {
  290. await executor._resolveDocCenterDocs(docs, { onText() {} });
  291. } finally {
  292. globalThis.fetch = originalFetch;
  293. }
  294. assert(docs['1'].includes('Current version: `// VL_VERSION:3.5`'), 'bundled local syntax should override remote path 1');
  295. assertEqual(docs['100'], 'remote meta prompt');
  296. });
  297. console.log(`\n── Results ──\n\n ${passed} passed, ${failed} failed\n`);
  298. process.exit(failed > 0 ? 1 : 0);