From 5bdaefff273b5142cfa02e7fee576e587e00a823 Mon Sep 17 00:00:00 2001
From: Vijay Upadya <41652029+vijayupadya@users.noreply.github.com>
Date: Tue, 31 Mar 2026 19:20:07 -0700
Subject: [PATCH] Troubleshooting: Enrich chat debug JSONL logs (#4887)

* Enrich chat debug JSONL logs

* feedback update

* Feedback update2
---
 .../prompts/skills/troubleshoot/SKILL.md      |  31 ++-
 .../vscode-node/chatDebugFileLoggerService.ts | 204 ++++++++++++++++--
 .../test/chatDebugFileLoggerService.spec.ts   |  34 +--
 .../extension/intents/node/toolCallingLoop.ts |  12 +-
 .../extension/prompt/node/chatMLFetcher.ts    |  43 +++-
 5 files changed, 280 insertions(+), 44 deletions(-)
diff --git a/extensions/copilot/assets/prompts/skills/troubleshoot/SKILL.md b/extensions/copilot/assets/prompts/skills/troubleshoot/SKILL.md
index 08acea63a13..1e32644eeba 100644
--- a/extensions/copilot/assets/prompts/skills/troubleshoot/SKILL.md
+++ b/extensions/copilot/assets/prompts/skills/troubleshoot/SKILL.md
@@ -28,6 +28,10 @@ Use direct debug log files written by Copilot Chat:
 debug-logs/<sessionId>/
   main.jsonl                              — always start here; primary conversation log
   models.json                             — (optional) snapshot of available models at session start
+  system_prompt_0.json                    — (optional) full system prompt sent to the model (untruncated)
+  system_prompt_1.json                    — (optional) written when the model changes mid-session
+  tools_0.json                            — (optional) tool definitions sent to the model
+  tools_1.json                            — (optional) written when the model changes mid-session
   runSubagent-<agentName>-<uuid>.jsonl    — (optional) subagent's tool calls & LLM requests
   searchSubagent-<uuid>.jsonl             — (optional) search subagent work
   title-<uuid>.jsonl                      — (optional, UI-only) title generation
@@ -37,6 +41,8 @@ debug-logs/<sessionId>/
 
 Always read `main.jsonl` first — it has the full conversation flow. Child files only appear when those operations occurred. `main.jsonl` contains `child_session_ref` entries that link to each child file by name. Title, categorization, and summarize files are UI housekeeping and rarely relevant to troubleshooting. When investigating model availability or selection issues, read `models.json` — it contains the full list of models (with capabilities, billing, and limits) that were available when the session started.
 
+When investigating what the model was told (system prompt, instructions), read the `system_prompt_*.json` file referenced by a `system_prompt_ref` entry in `main.jsonl`. The file contains the full untruncated system prompt as `{ "content": "..." }`. When investigating which tools were available, read the `tools_*.json` file similarly. If the model changed mid-session, multiple numbered files exist — each `llm_request` entry has a `systemPromptFile` attr indicating which file was active for that request.
+
 Each line is a JSON object. Common fields: `ts` (epoch ms), `dur` (duration ms), `sid` (session ID), `type`, `name`, `spanId`, `parentSpanId`, `status` (`ok`|`error`), `attrs` (type-specific details).
 
 ### Event Type Reference with Examples
@@ -58,15 +64,15 @@ Key attrs: `args` (JSON string of tool input), `result` (tool output or error te
 
 #### llm_request — model round-trip
 ```jsonl
-{"ts":1773200231010,"dur":3001,"sid":"62f52dec","type":"llm_request","name":"chat:gpt-4o","spanId":"000000000000000c","parentSpanId":"0000000000000003","status":"ok","attrs":{"model":"gpt-4o","inputTokens":15025,"outputTokens":126,"ttft":1987}}
+{"ts":1773200231010,"dur":3001,"sid":"62f52dec","type":"llm_request","name":"chat:gpt-4o","spanId":"000000000000000c","parentSpanId":"0000000000000003","status":"ok","attrs":{"model":"gpt-4o","inputTokens":15025,"outputTokens":126,"ttft":1987,"maxTokens":32000,"systemPromptFile":"system_prompt_0.json","userRequest":"echo hello","inputMessages":"[{...}]"}}
 ```
-Key attrs: `model`, `inputTokens`, `outputTokens`, `ttft` (time to first token in ms), `error` (when failed).
+Key attrs: `model`, `inputTokens`, `outputTokens`, `ttft` (time to first token in ms), `maxTokens`, `temperature`, `topP`, `systemPromptFile` (references a file in the session directory), `userRequest` (the full user message content, untruncated), `inputMessages` (full messages array as JSON, pre-truncated at 64KB), `error` (when failed).
 
 #### agent_response — model output (text + tool calls)
 ```jsonl
-{"ts":1773200234011,"dur":0,"sid":"62f52dec","type":"agent_response","name":"agent_response","spanId":"agent-msg-000000000000000c","parentSpanId":"0000000000000003","status":"ok","attrs":{"response":"[{\"role\":\"assistant\",\"parts\":[{\"type\":\"text\",\"content\":\"Running your command now.\"},{\"type\":\"tool_call\",\"name\":\"run_in_terminal\",\"arguments\":\"{...}\"}]}]"}}
+{"ts":1773200234011,"dur":0,"sid":"62f52dec","type":"agent_response","name":"agent_response","spanId":"agent-msg-000000000000000c","parentSpanId":"0000000000000003","status":"ok","attrs":{"response":"[{\"role\":\"assistant\",...}]","reasoning":"The user wants me to run a command."}}
 ```
-Key attrs: `response` (JSON-encoded array of message parts; may be truncated).
+Key attrs: `response` (JSON-encoded array of message parts; may be truncated), `reasoning` (optional — the model's chain-of-thought/thinking text when thinking mode is active; may be truncated).
 
 #### user_message — user input
 ```jsonl
@@ -85,6 +91,23 @@ Key attrs: `agentName`, `description` (optional), `error` (when failed).
 {"ts":1773200260000,"dur":0,"sid":"62f52dec","type":"generic","name":"some-event","spanId":"abc123","status":"ok","attrs":{"details":"Additional context","category":"some-category"}}
 ```
 
+Special generic entries:
+- `system_prompt_ref` — references a `system_prompt_*.json` file in the session directory. `attrs.file` is the filename, `attrs.model` is the model it was written for. Read this file to see the full system prompt.
+- `tools_ref` — references a `tools_*.json` file. `attrs.file` is the filename, `attrs.model` is the model.
+
+#### session_start — session metadata (appears once at session start)
+```jsonl
+{"ts":1773200251300,"dur":0,"sid":"62f52dec","type":"session_start","name":"session_start","spanId":"session-start-62f52dec","status":"ok","attrs":{"copilotVersion":"0.43.2026033104","vscodeVersion":"1.99.0"}}
+```
+Key attrs: `copilotVersion`, `vscodeVersion`. Useful for identifying which build produced the logs.
+
+#### turn_start / turn_end — tool-calling loop iteration boundaries
+```jsonl
+{"ts":1773200251400,"dur":0,"sid":"62f52dec","type":"turn_start","name":"turn_start:0","spanId":"turn-start-X-0","status":"ok","attrs":{"turnId":"0"}}
+{"ts":1773200255000,"dur":0,"sid":"62f52dec","type":"turn_end","name":"turn_end:0","spanId":"turn-end-X-0","status":"ok","attrs":{"turnId":"0"}}
+```
+Key attrs: `turnId` (iteration number within a single user request's tool-calling loop). Use these to identify which iteration events belong to and to count total loop iterations.
+
 ### Reading the event hierarchy
 
 Events form a tree via `spanId`/`parentSpanId`. A typical chain:
diff --git a/extensions/copilot/src/extension/chat/vscode-node/chatDebugFileLoggerService.ts b/extensions/copilot/src/extension/chat/vscode-node/chatDebugFileLoggerService.ts
index ac3d1634fcf..472920c11ec 100644
--- a/extensions/copilot/src/extension/chat/vscode-node/chatDebugFileLoggerService.ts
+++ b/extensions/copilot/src/extension/chat/vscode-node/chatDebugFileLoggerService.ts
@@ -7,6 +7,7 @@ import * as fs from 'fs';
 import * as vscode from 'vscode';
 import { IChatDebugFileLoggerService, sessionResourceToId } from '../../../platform/chat/common/chatDebugFileLoggerService';
 import { ConfigKey, IConfigurationService } from '../../../platform/configuration/common/configurationService';
+import { IEnvService } from '../../../platform/env/common/envService';
 import { IVSCodeExtensionContext } from '../../../platform/extContext/common/extensionContext';
 import { createDirectoryIfNotExists, IFileSystemService } from '../../../platform/filesystem/common/fileSystemService';
 import { ILogService } from '../../../platform/log/common/logService';
@@ -46,6 +47,18 @@ interface IActiveLogSession {
 	hasOwnSpans: boolean;
 	/** Whether models.json has already been written to this session's directory */
 	modelSnapshotWritten: boolean;
+	/** Model name of the last-written system prompt (undefined = none written yet) */
+	systemPromptModel: string | undefined;
+	/** Index of the next system_prompt file to write */
+	systemPromptIndex: number;
+	/** File name of the most recently written system prompt (e.g., 'system_prompt_0.json') */
+	currentSystemPromptFile: string | undefined;
+	/** Model name of the last-written tools file (undefined = none written yet) */
+	toolsModel: string | undefined;
+	/** Index of the next tools file to write */
+	toolsIndex: number;
+	/** File name of the most recently written tools file (e.g., 'tools_0.json') */
+	currentToolsFile: string | undefined;
 }
 
 /**
@@ -59,7 +72,7 @@ interface IDebugLogEntry {
 	/** Chat session ID */
 	readonly sid: string;
 	/** Event type */
-	readonly type: 'tool_call' | 'llm_request' | 'user_message' | 'agent_response' | 'subagent' | 'discovery' | 'error' | 'generic' | 'child_session_ref' | 'hook';
+	readonly type: 'session_start' | 'tool_call' | 'llm_request' | 'user_message' | 'agent_response' | 'subagent' | 'discovery' | 'error' | 'generic' | 'child_session_ref' | 'hook' | 'turn_start' | 'turn_end';
 	/** Descriptive name */
 	readonly name: string;
 	/** Span or event ID */
@@ -99,6 +112,7 @@ export class ChatDebugFileLoggerService extends Disposable implements IChatDebug
 		@IConfigurationService private readonly _configurationService: IConfigurationService,
 		@IExperimentationService private readonly _experimentationService: IExperimentationService,
 		@ITelemetryService private readonly _telemetryService: ITelemetryService,
+		@IEnvService private readonly _envService: IEnvService,
 	) {
 		super();
 
@@ -211,12 +225,7 @@ export class ChatDebugFileLoggerService extends Disposable implements IChatDebug
 				existing.hasOwnSpans = true;
 				// Now that we know this is a real session, replay pending core events
 				if (!existing.parentSessionId) {
-					for (const entry of this._pendingCoreEvents) {
-						this._bufferEntry(sessionId, { ...entry, sid: sessionId });
-					}
-					if (this._modelSnapshot) {
-						this._enqueueModelSnapshotWrite(existing);
-					}
+					this._emitSessionStartAndReplay(sessionId, existing);
 				}
 			}
 			return;
@@ -276,20 +285,19 @@ export class ChatDebugFileLoggerService extends Disposable implements IChatDebug
 			label: childInfo?.label,
 			hasOwnSpans,
 			modelSnapshotWritten: false,
+			systemPromptModel: undefined,
+			systemPromptIndex: 0,
+			currentSystemPromptFile: undefined,
+			toolsModel: undefined,
+			toolsIndex: 0,
+			currentToolsFile: undefined,
 		};
 		this._activeSessions.set(sessionId, session);
 
 		// Replay pending core events only for parent sessions that have their own spans
 		// (not for sessions auto-created as a side effect of child parent references)
 		if (!childInfo && hasOwnSpans) {
-			for (const entry of this._pendingCoreEvents) {
-				this._bufferEntry(sessionId, { ...entry, sid: sessionId });
-			}
-
-			// Write cached model snapshot as models.json in the session directory
-			if (this._modelSnapshot) {
-				this._enqueueModelSnapshotWrite(session);
-			}
+			this._emitSessionStartAndReplay(sessionId, session);
 		}
 
 		// Start auto-flush timer if this is the first active session
@@ -424,6 +432,52 @@ export class ChatDebugFileLoggerService extends Disposable implements IChatDebug
 		}
 	}
 
+	private _enqueueFileWrite(session: IActiveLogSession, content: string, fileName: string): void {
+		session.flushPromise = session.flushPromise.then(
+			() => this._writeSessionFile(session, content, fileName),
+			() => this._writeSessionFile(session, content, fileName),
+		);
+	}
+
+	private async _writeSessionFile(session: IActiveLogSession, content: string, fileName: string): Promise<void> {
+		try {
+			if (!session.dirEnsured) {
+				await createDirectoryIfNotExists(this._fileSystemService, session.sessionDir);
+				session.dirEnsured = true;
+			}
+			const fileUri = URI.joinPath(session.sessionDir, fileName);
+			await fs.promises.writeFile(fileUri.fsPath, fileName.endsWith('.json') ? JSON.stringify({ content }, null, 2) : content, 'utf-8');
+		} catch (err) {
+			this._logService.error(`[ChatDebugFileLogger] Failed to write ${fileName}`, err);
+		}
+	}
+
+	/**
+	 * Emit a session_start entry, replay cached core events, and write the model snapshot.
+	 * Called when a parent session is first promoted to hasOwnSpans.
+	 */
+	private _emitSessionStartAndReplay(sessionId: string, session: IActiveLogSession): void {
+		this._bufferEntry(sessionId, {
+			ts: Date.now(),
+			dur: 0,
+			sid: sessionId,
+			type: 'session_start',
+			name: 'session_start',
+			spanId: `session-start-${sessionId}`,
+			status: 'ok',
+			attrs: {
+				copilotVersion: this._envService.getVersion(),
+				vscodeVersion: this._envService.vscodeVersion,
+			},
+		});
+		for (const entry of this._pendingCoreEvents) {
+			this._bufferEntry(sessionId, { ...entry, sid: sessionId });
+		}
+		if (this._modelSnapshot) {
+			this._enqueueModelSnapshotWrite(session);
+		}
+	}
+
 	// ── OTel span handling ──
 
 	private _onSpanCompleted(span: ICompletedSpanData): void {
@@ -466,7 +520,74 @@ export class ChatDebugFileLoggerService extends Disposable implements IChatDebug
 		//      (handled in _ensureSession's child branch).
 		this._ensureSession(sessionId);
 
+		// Write system_prompt JSON when model changes (before buffering so llm_request gets the file ref)
+		if (opName === GenAiOperationName.CHAT) {
+			const session = this._activeSessions.get(sessionId);
+			if (session && session.hasOwnSpans && !session.parentSessionId) {
+				const model = asString(span.attributes[GenAiAttr.REQUEST_MODEL])
+					?? asString(span.attributes[GenAiAttr.RESPONSE_MODEL])
+					?? 'unknown';
+				if (model !== session.systemPromptModel) {
+					const systemInstructions = asString(span.attributes[GenAiAttr.SYSTEM_INSTRUCTIONS]);
+					if (systemInstructions) {
+						const fileName = `system_prompt_${session.systemPromptIndex}.json`;
+						session.systemPromptModel = model;
+						session.systemPromptIndex++;
+						session.currentSystemPromptFile = fileName;
+						this._enqueueFileWrite(session, systemInstructions, fileName);
+						this._bufferEntry(sessionId, {
+							ts: span.startTime,
+							dur: 0,
+							sid: sessionId,
+							type: 'generic',
+							name: 'system_prompt_ref',
+							spanId: `sys-prompt-${span.spanId}`,
+							status: 'ok',
+							attrs: { file: fileName, model },
+						});
+					}
+				}
+			}
+		}
+
+		// Write tools JSON when model changes on INVOKE_AGENT spans (tool definitions live on agent span)
+		if (opName === GenAiOperationName.INVOKE_AGENT && !span.parentSpanId) {
+			const session = this._activeSessions.get(sessionId);
+			if (session && session.hasOwnSpans && !session.parentSessionId) {
+				const model = asString(span.attributes[GenAiAttr.REQUEST_MODEL])
+					?? asString(span.attributes[GenAiAttr.RESPONSE_MODEL])
+					?? 'unknown';
+				if (model !== session.toolsModel) {
+					const toolDefs = asString(span.attributes[GenAiAttr.TOOL_DEFINITIONS]);
+					if (toolDefs) {
+						const fileName = `tools_${session.toolsIndex}.json`;
+						session.toolsModel = model;
+						session.toolsIndex++;
+						session.currentToolsFile = fileName;
+						this._enqueueFileWrite(session, toolDefs, fileName);
+						this._bufferEntry(sessionId, {
+							ts: span.startTime,
+							dur: 0,
+							sid: sessionId,
+							type: 'generic',
+							name: 'tools_ref',
+							spanId: `tools-${span.spanId}`,
+							status: 'ok',
+							attrs: { file: fileName, model },
+						});
+					}
+				}
+			}
+		}
+
 		if (entry) {
+			// Attach current system prompt file reference to llm_request entries
+			if (entry.type === 'llm_request') {
+				const session = this._activeSessions.get(sessionId);
+				if (session?.currentSystemPromptFile) {
+					entry.attrs.systemPromptFile = session.currentSystemPromptFile;
+				}
+			}
 			this._bufferEntry(sessionId, entry);
 		}
 
@@ -478,6 +599,7 @@ export class ChatDebugFileLoggerService extends Disposable implements IChatDebug
 		if (opName === GenAiOperationName.CHAT) {
 			// Extract agent response summary from output messages
 			if (outputMessages) {
+				const reasoningContent = asString(span.attributes[CopilotChatAttr.REASONING_CONTENT]);
 				this._bufferEntry(sessionId, {
 					ts: span.endTime,
 					dur: 0,
@@ -489,6 +611,7 @@ export class ChatDebugFileLoggerService extends Disposable implements IChatDebug
 					status: 'ok',
 					attrs: {
 						response: truncate(outputMessages, MAX_ATTR_VALUE_LENGTH),
+						...(reasoningContent ? { reasoning: truncate(reasoningContent, MAX_ATTR_VALUE_LENGTH) } : {}),
 					},
 				});
 			}
@@ -496,6 +619,11 @@ export class ChatDebugFileLoggerService extends Disposable implements IChatDebug
 	}
 
 	private _onSpanEvent(event: ISpanEventData): void {
+		if (event.eventName === 'turn_start' || event.eventName === 'turn_end') {
+			this._onTurnBoundaryEvent(event);
+			return;
+		}
+
 		if (event.eventName !== 'user_message') {
 			return;
 		}
@@ -506,7 +634,9 @@ export class ChatDebugFileLoggerService extends Disposable implements IChatDebug
 
 		// If the event carries a session ID, route to that specific session
 		const eventSessionId = event.attributes[CopilotChatAttr.CHAT_SESSION_ID];
-		if (typeof eventSessionId === 'string' && this._activeSessions.has(eventSessionId)) {
+		if (typeof eventSessionId === 'string') {
+			// Ensure the session buffer exists so early events (before any span completes) are captured
+			this._ensureSession(eventSessionId);
 			this._bufferEntry(eventSessionId, {
 				ts: event.timestamp,
 				dur: 0,
@@ -568,6 +698,33 @@ export class ChatDebugFileLoggerService extends Disposable implements IChatDebug
 		}
 	}
 
+	private _onTurnBoundaryEvent(event: ISpanEventData): void {
+		const type = event.eventName === 'turn_start' ? 'turn_start' : 'turn_end';
+		const turnId = typeof event.attributes.turnId === 'string' ? event.attributes.turnId : String(event.attributes.turnId ?? '');
+		const sessionId = typeof event.attributes[CopilotChatAttr.CHAT_SESSION_ID] === 'string'
+			? event.attributes[CopilotChatAttr.CHAT_SESSION_ID] as string
+			: (event.parentSpanId ? this._spanSessionIndex.get(event.parentSpanId) : undefined);
+
+		if (!sessionId) {
+			return;
+		}
+
+		// Ensure the session buffer exists so early turn events are captured
+		this._ensureSession(sessionId);
+
+		this._bufferEntry(sessionId, {
+			ts: event.timestamp,
+			dur: 0,
+			sid: sessionId,
+			type,
+			name: `${type}:${turnId}`,
+			spanId: `${type}-${event.spanId}-${turnId}`,
+			parentSpanId: event.parentSpanId,
+			status: 'ok',
+			attrs: { turnId },
+		});
+	}
+
 	// ── Core debug event handling (discovery, skill loading, etc.) ──
 
 	private _onCoreDebugEvent(event: vscode.ChatDebugEvent): void {
@@ -666,6 +823,21 @@ export class ChatDebugFileLoggerService extends Disposable implements IChatDebug
 						...(span.attributes[CopilotChatAttr.TIME_TO_FIRST_TOKEN] !== undefined
 							? { ttft: asNumber(span.attributes[CopilotChatAttr.TIME_TO_FIRST_TOKEN]) }
 							: {}),
+						...(span.attributes[CopilotChatAttr.USER_REQUEST] !== undefined
+							? { userRequest: String(span.attributes[CopilotChatAttr.USER_REQUEST]) }
+							: {}),
+						...(span.attributes[GenAiAttr.INPUT_MESSAGES] !== undefined
+							? { inputMessages: String(span.attributes[GenAiAttr.INPUT_MESSAGES]) }
+							: {}),
+						...(span.attributes[GenAiAttr.REQUEST_MAX_TOKENS] !== undefined
+							? { maxTokens: asNumber(span.attributes[GenAiAttr.REQUEST_MAX_TOKENS]) }
+							: {}),
+						...(span.attributes[GenAiAttr.REQUEST_TEMPERATURE] !== undefined
+							? { temperature: asNumber(span.attributes[GenAiAttr.REQUEST_TEMPERATURE]) }
+							: {}),
+						...(span.attributes[GenAiAttr.REQUEST_TOP_P] !== undefined
+							? { topP: asNumber(span.attributes[GenAiAttr.REQUEST_TOP_P]) }
+							: {}),
 						...(isError && span.status.message ? { error: span.status.message } : {}),
 					},
 				};
diff --git a/extensions/copilot/src/extension/chat/vscode-node/test/chatDebugFileLoggerService.spec.ts b/extensions/copilot/src/extension/chat/vscode-node/test/chatDebugFileLoggerService.spec.ts
index 78cdc481fac..a6291fdc397 100644
--- a/extensions/copilot/src/extension/chat/vscode-node/test/chatDebugFileLoggerService.spec.ts
+++ b/extensions/copilot/src/extension/chat/vscode-node/test/chatDebugFileLoggerService.spec.ts
@@ -8,6 +8,7 @@ import * as os from 'os';
 import * as path from 'path';
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 import { ConfigKey, IConfigurationService } from '../../../../platform/configuration/common/configurationService';
+import { IEnvService } from '../../../../platform/env/common/envService';
 import { IVSCodeExtensionContext } from '../../../../platform/extContext/common/extensionContext';
 import { IFileSystemService } from '../../../../platform/filesystem/common/fileSystemService';
 import { ILogService } from '../../../../platform/log/common/logService';
@@ -158,6 +159,12 @@ class TestTelemetryService {
 	sendMSFTTelemetryEvent() { }
 }
 
+class TestEnvService {
+	declare readonly _serviceBrand: undefined;
+	readonly vscodeVersion = '1.99.0-test';
+	getVersion() { return '0.0.0-test'; }
+}
+
 describe('ChatDebugFileLoggerService', () => {
 	let disposables: DisposableStore;
 	let tmpDir: string;
@@ -178,6 +185,7 @@ describe('ChatDebugFileLoggerService', () => {
 			new TestConfigurationService() as unknown as IConfigurationService,
 			new NullExperimentationService() as unknown as IExperimentationService,
 			new TestTelemetryService() as unknown as ITelemetryService,
+			new TestEnvService() as unknown as IEnvService,
 		);
 		disposables.add(service);
 	});
@@ -206,11 +214,12 @@ describe('ChatDebugFileLoggerService', () => {
 		await service.flush('session-1');
 		const entries = await readLogEntries('session-1');
 
-		expect(entries).toHaveLength(1);
-		expect(entries[0].type).toBe('tool_call');
-		expect(entries[0].name).toBe('read_file');
-		expect(entries[0].sid).toBe('session-1');
-		expect(entries[0].status).toBe('ok');
+		expect(entries).toHaveLength(2);
+		expect(entries[0].type).toBe('session_start');
+		expect(entries[1].type).toBe('tool_call');
+		expect(entries[1].name).toBe('read_file');
+		expect(entries[1].sid).toBe('session-1');
+		expect(entries[1].status).toBe('ok');
 	});
 
 	it('writes LLM request with token counts', async () => {
@@ -221,10 +230,10 @@ describe('ChatDebugFileLoggerService', () => {
 		await service.flush('session-1');
 		const entries = await readLogEntries('session-1');
 
-		expect(entries).toHaveLength(1);
-		expect(entries[0].type).toBe('llm_request');
-		expect(entries[0].name).toBe('chat:gpt-4o');
-		const attrs = entries[0].attrs as Record<string, unknown>;
+		expect(entries).toHaveLength(2);
+		expect(entries[1].type).toBe('llm_request');
+		expect(entries[1].name).toBe('chat:gpt-4o');
+		const attrs = entries[1].attrs as Record<string, unknown>;
 		expect(attrs.model).toBe('gpt-4o');
 		expect(attrs.inputTokens).toBe(1000);
 		expect(attrs.outputTokens).toBe(500);
@@ -245,8 +254,8 @@ describe('ChatDebugFileLoggerService', () => {
 		await service.flush('session-1');
 		const entries = await readLogEntries('session-1');
 
-		expect(entries[0].status).toBe('error');
-		expect((entries[0].attrs as Record<string, unknown>).error).toBe('Command failed');
+		expect(entries[1].status).toBe('error');
+		expect((entries[1].attrs as Record<string, unknown>).error).toBe('Command failed');
 	});
 
 	it('isDebugLogUri returns true for files under debug-logs', () => {
@@ -302,7 +311,7 @@ describe('ChatDebugFileLoggerService', () => {
 		await service.flush('session-1');
 		const entries = await readLogEntries('session-1');
 
-		const args = (entries[0].attrs as Record<string, unknown>).args as string;
+		const args = (entries[1].attrs as Record<string, unknown>).args as string;
 		expect(args.length).toBeLessThan(longArgs.length);
 		expect(args).toContain('[truncated]');
 	});
@@ -364,6 +373,7 @@ describe('ChatDebugFileLoggerService', () => {
 			configService as unknown as IConfigurationService,
 			new NullExperimentationService() as unknown as IExperimentationService,
 			new TestTelemetryService() as unknown as ITelemetryService,
+			new TestEnvService() as unknown as IEnvService,
 		);
 		disposables.add(svc);
 		disposables.add(configChangeEmitter);
diff --git a/extensions/copilot/src/extension/intents/node/toolCallingLoop.ts b/extensions/copilot/src/extension/intents/node/toolCallingLoop.ts
index a697aa34290..788856c7fbd 100644
--- a/extensions/copilot/src/extension/intents/node/toolCallingLoop.ts
+++ b/extensions/copilot/src/extension/intents/node/toolCallingLoop.ts
@@ -22,7 +22,7 @@ import { isOpenAIContextManagementResponse, OpenAiFunctionDef } from '../../../p
 import { IMakeChatRequestOptions } from '../../../platform/networking/common/networking';
 import { OpenAIContextManagementResponse } from '../../../platform/networking/common/openai';
 import { CopilotChatAttr, emitAgentTurnEvent, emitSessionStartEvent, GenAiAttr, GenAiMetrics, GenAiOperationName, GenAiProviderName, resolveWorkspaceOTelMetadata, StdAttr, truncateForOTel, workspaceMetadataToOTelAttributes } from '../../../platform/otel/common/index';
-import { IOTelService, SpanKind, SpanStatusCode } from '../../../platform/otel/common/otelService';
+import { IOTelService, ISpanHandle, SpanKind, SpanStatusCode } from '../../../platform/otel/common/otelService';
 import { getCurrentCapturingToken, IRequestLogger } from '../../../platform/requestLogger/node/requestLogger';
 import { IExperimentationService } from '../../../platform/telemetry/common/nullExperimentationService';
 import { ITelemetryService } from '../../../platform/telemetry/common/telemetry';
@@ -785,7 +785,7 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
 				});
 
 				try {
-					const result = await this._runLoop(outputStream, token);
+					const result = await this._runLoop(outputStream, token, span, chatSessionId);
 					span.setAttributes({
 						[CopilotChatAttr.TURN_COUNT]: result.toolCallRounds.length,
 						[GenAiAttr.USAGE_INPUT_TOKENS]: totalInputTokens,
@@ -803,9 +803,9 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
 						}
 						// Log tool definitions once on the agent span (same set across all turns)
 						if (result.availableTools.length > 0) {
-							span.setAttribute(GenAiAttr.TOOL_DEFINITIONS, truncateForOTel(JSON.stringify(
+							span.setAttribute(GenAiAttr.TOOL_DEFINITIONS, JSON.stringify(
 								result.availableTools.map(t => ({ type: 'function', name: t.name, description: t.description }))
-							)));
+							));
 						}
 					}
 					span.setStatus(SpanStatusCode.OK);
@@ -827,7 +827,7 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
 		);
 	}
 
-	private async _runLoop(outputStream: ChatResponseStream | undefined, token: CancellationToken): Promise<IToolCallLoopResult> {
+	private async _runLoop(outputStream: ChatResponseStream | undefined, token: CancellationToken, agentSpan?: ISpanHandle, chatSessionId?: string): Promise<IToolCallLoopResult> {
 		let i = 0;
 		let lastResult: IToolCallSingleResult | undefined;
 		let lastRequestMessagesStartingIndexForRun: number | undefined;
@@ -859,6 +859,7 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
 			try {
 				const turnId = String(i);
 				this._sessionTranscriptService.logAssistantTurnStart(sessionId, turnId);
+				agentSpan?.addEvent('turn_start', { turnId, ...(chatSessionId ? { [CopilotChatAttr.CHAT_SESSION_ID]: chatSessionId } : {}) });
 				this.resolveAutopilotProgress();
 				const result = await this.runOne(outputStream, i, token);
 				if (lastRequestMessagesStartingIndexForRun === undefined) {
@@ -871,6 +872,7 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
 
 				this.toolCallRounds.push(result.round);
 				this._sessionTranscriptService.logAssistantTurnEnd(sessionId, turnId);
+				agentSpan?.addEvent('turn_end', { turnId, ...(chatSessionId ? { [CopilotChatAttr.CHAT_SESSION_ID]: chatSessionId } : {}) });
 
 				// If the model produced productive (non-task_complete) tool calls after being nudged,
 				// reset the stop hook flag and iteration count so it can be nudged again.
diff --git a/extensions/copilot/src/extension/prompt/node/chatMLFetcher.ts b/extensions/copilot/src/extension/prompt/node/chatMLFetcher.ts
index 21699924f9d..c2be9927f32 100644
--- a/extensions/copilot/src/extension/prompt/node/chatMLFetcher.ts
+++ b/extensions/copilot/src/extension/prompt/node/chatMLFetcher.ts
@@ -34,6 +34,7 @@ import { getCurrentCapturingToken, IRequestLogger } from '../../../platform/requ
 import { IExperimentationService } from '../../../platform/telemetry/common/nullExperimentationService';
 import { ITelemetryService, TelemetryProperties } from '../../../platform/telemetry/common/telemetry';
 import { TelemetryData } from '../../../platform/telemetry/common/telemetryData';
+import { isEncryptedThinkingDelta } from '../../../platform/thinking/common/thinking';
 import { calculateLineRepetitionStats, isRepetitive } from '../../../util/common/anomalyDetection';
 import { ErrorUtils } from '../../../util/common/errors';
 import { AsyncIterableObject } from '../../../util/vs/base/common/async';
@@ -244,25 +245,40 @@ export class ChatMLFetcherImpl extends AbstractChatMLFetcher {
 
 				// Extract and set structured prompt sections for the debug panel
 				if (otelInferenceSpan) {
-					const capiMessages = requestBody.messages as ReadonlyArray<{ role?: string; content?: string; tool_calls?: ReadonlyArray<{ id: string; function: { name: string; arguments: string } }> }>;
+					// Support both Chat Completions API (messages) and Responses API (input) formats
+					const capiMessages = (requestBody.messages ?? requestBody.input) as ReadonlyArray<{ role?: string; content?: string | unknown[] }> | undefined;
 					// User request: last user-role message
 					const userMessages = capiMessages?.filter(m => m.role === 'user');
 					const lastUserMsg = userMessages?.[userMessages.length - 1];
 					if (lastUserMsg?.content) {
-						otelInferenceSpan.setAttribute(CopilotChatAttr.USER_REQUEST, lastUserMsg.content);
+						const userContent = typeof lastUserMsg.content === 'string'
+							? lastUserMsg.content
+							: JSON.stringify(lastUserMsg.content);
+						otelInferenceSpan.setAttribute(CopilotChatAttr.USER_REQUEST, truncateForOTel(userContent));
 					}
-					// System instructions (first system message) — raw text for debug panel sections
+					// System instructions — check messages array, top-level system (Anthropic), or instructions (Responses API)
 					const systemMsg = capiMessages?.find(m => m.role === 'system');
-					if (systemMsg?.content) {
-						otelInferenceSpan.setAttribute(GenAiAttr.SYSTEM_INSTRUCTIONS, systemMsg.content);
+					const systemContent = systemMsg?.content
+						?? (requestBody as Record<string, unknown>).system
+						?? (requestBody as Record<string, unknown>).instructions;
+					if (systemContent) {
+						const systemText = typeof systemContent === 'string'
+							? systemContent
+							: JSON.stringify(systemContent);
+						otelInferenceSpan.setAttribute(GenAiAttr.SYSTEM_INSTRUCTIONS, systemText);
 					}
 				}
 
 				// Always capture full request content for the debug panel
 				if (otelInferenceSpan) {
-					const capiMessages = requestBody.messages as ReadonlyArray<{ role?: string; content?: string; tool_calls?: ReadonlyArray<{ id: string; function: { name: string; arguments: string } }> }>;
+					const capiMessages = (requestBody.messages ?? requestBody.input) as ReadonlyArray<{ role?: string; content?: string | unknown[] }> | undefined;
 					if (capiMessages) {
-						otelInferenceSpan.setAttribute(GenAiAttr.INPUT_MESSAGES, truncateForOTel(JSON.stringify(toInputMessages(capiMessages))));
+						// Normalize non-string content (Anthropic arrays, Responses API parts) to strings for OTel schema
+						const normalized = capiMessages.map(m => ({
+							...m,
+							content: typeof m.content === 'string' ? m.content : m.content ? JSON.stringify(m.content) : undefined,
+						}));
+						otelInferenceSpan.setAttribute(GenAiAttr.INPUT_MESSAGES, truncateForOTel(JSON.stringify(toInputMessages(normalized))));
 					}
 				}
 				tokenCount = await chatEndpoint.acquireTokenizer().countMessagesTokens(messages);
@@ -382,6 +398,19 @@ export class ChatMLFetcherImpl extends AbstractChatMLFetcher {
 						if (parts.length > 0) {
 							otelInferenceSpan.setAttribute(GenAiAttr.OUTPUT_MESSAGES, truncateForOTel(JSON.stringify([{ role: 'assistant', parts }])));
 						}
+						// Capture reasoning/thinking text if present
+						const hasThinking = streamRecorder.deltas.some(d => d.thinking);
+						if (hasThinking) {
+							const thinkingTexts = streamRecorder.deltas
+								.filter(d => d.thinking && !isEncryptedThinkingDelta(d.thinking) && d.thinking.text)
+								.map(d => {
+									const t = d.thinking!;
+									if ('encrypted' in t) { return ''; }
+									return Array.isArray(t.text) ? t.text.join('') : (t.text ?? '');
+								});
+							const reasoningText = thinkingTexts.join('');
+							otelInferenceSpan.setAttribute(CopilotChatAttr.REASONING_CONTENT, truncateForOTel(reasoningText || '[encrypted]'));
+						}
 					}
 
 					// Emit OTel inference details event BEFORE ending the span