From 6863a33f1adb8f4b6cb51036056d0398b4f77ed3 Mon Sep 17 00:00:00 2001 From: Megan Rogge Date: Fri, 11 Oct 2024 14:39:13 -0700 Subject: [PATCH] add screenshot capability, add screenshot to attachment context picker (#231131) --- src/vs/code/electron-main/app.ts | 64 +++++++- .../platform/screenshot/browser/screenshot.ts | 139 ++++++++++++++++++ .../chat/browser/actions/chatActions.ts | 17 +-- .../browser/actions/chatContextActions.ts | 31 +++- .../chat/browser/contrib/screenshot.ts | 26 ++++ 5 files changed, 263 insertions(+), 14 deletions(-) create mode 100644 src/vs/platform/screenshot/browser/screenshot.ts create mode 100644 src/vs/workbench/contrib/chat/browser/contrib/screenshot.ts diff --git a/src/vs/code/electron-main/app.ts b/src/vs/code/electron-main/app.ts index ebf401ded8f35..6c4507175bbaf 100644 --- a/src/vs/code/electron-main/app.ts +++ b/src/vs/code/electron-main/app.ts @@ -3,7 +3,7 @@ * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ -import { app, BrowserWindow, protocol, session, Session, systemPreferences, WebFrameMain } from 'electron'; +import { app, BrowserWindow, desktopCapturer, protocol, session, Session, systemPreferences, screen, WebFrameMain } from 'electron'; import { addUNCHostToAllowlist, disableUNCAccessRestrictions } from '../../base/node/unc.js'; import { validatedIpcMain } from '../../base/parts/ipc/electron-main/ipcMain.js'; import { hostname, release } from 'os'; @@ -163,6 +163,7 @@ export class CodeApplication extends Disposable { // !!! DO NOT CHANGE without consulting the documentation !!! // + const isUrlFromWindow = (requestingUrl?: string | undefined) => requestingUrl?.startsWith(`${Schemas.vscodeFileResource}://${VSCODE_AUTHORITY}`); const isUrlFromWebview = (requestingUrl: string | undefined) => requestingUrl?.startsWith(`${Schemas.vscodeWebview}://`); const allowedPermissionsInWebview = new Set([ @@ -170,11 +171,19 @@ export class CodeApplication extends Disposable { 'clipboard-sanitized-write', ]); + const allowedPermissionsInCore = new Set([ + 'media' + ]); + session.defaultSession.setPermissionRequestHandler((_webContents, permission, callback, details) => { if (isUrlFromWebview(details.requestingUrl)) { return callback(allowedPermissionsInWebview.has(permission)); } + if (isUrlFromWindow(details.requestingUrl)) { + return callback(allowedPermissionsInCore.has(permission)); + } + return callback(false); }); @@ -182,9 +191,60 @@ export class CodeApplication extends Disposable { if (isUrlFromWebview(details.requestingUrl)) { return allowedPermissionsInWebview.has(permission); } - + if (isUrlFromWindow(details.requestingUrl)) { + return allowedPermissionsInCore.has(permission); + } return false; }); + session.defaultSession.setDisplayMediaRequestHandler(async (request, callback) => { + + // Get the currently focused window + const focusedWindow = BrowserWindow.getFocusedWindow(); + + if (!focusedWindow) { + return; + } + + // Get the bounds (position and size) of the focused window + const windowBounds = focusedWindow.getBounds(); + + // Get all the screen sources + const screens = await desktopCapturer.getSources({ types: ['screen'] }); + + // Get the display that contains the focused window + const displays = screen.getAllDisplays(); + + // Find the screen that contains the focused window + for (const display of displays) { + const displayBounds = display.bounds; + + // Check if the window is within the display's bounds. The center of the window is + // used since maximizing actually causes the window to go beyond the screen. There + // is also the case where a window could be spread across multiple screens. + const windowCenter = { + x: windowBounds.x + windowBounds.width / 2, + y: windowBounds.y + windowBounds.height / 2, + }; + if ( + windowCenter.x >= displayBounds.x && + windowCenter.x <= displayBounds.x + displayBounds.width && + windowCenter.y >= displayBounds.y && + windowCenter.y <= displayBounds.y + displayBounds.height + ) { + // Match the display to the screen source + for (const source of screens) { + if (source.display_id === display.id.toString()) { + // Found the screen containing the focused window + callback({ video: source, audio: 'loopback' }); + return; + } + } + } + } + + // Fallback: if no matching screen is found, return the first screen + callback({ video: screens[0], audio: 'loopback' }); + }); //#endregion diff --git a/src/vs/platform/screenshot/browser/screenshot.ts b/src/vs/platform/screenshot/browser/screenshot.ts new file mode 100644 index 0000000000000..74997e64eb022 --- /dev/null +++ b/src/vs/platform/screenshot/browser/screenshot.ts @@ -0,0 +1,139 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { addDisposableListener, getActiveWindow } from '../../../base/browser/dom.js'; +import { DisposableStore, toDisposable } from '../../../base/common/lifecycle.js'; + +interface IBoundingBox { + x: number; + y: number; + width: number; + height: number; + + left: number; + top: number; + right: number; + bottom: number; +} + +class BoundingBox implements IBoundingBox { + constructor( + public readonly x: number, + public readonly y: number, + public readonly width: number, + public readonly height: number, + ) { } + + get left() { return this.x; } + get top() { return this.y; } + get right() { return this.x + this.width; } + get bottom() { return this.y + this.height; } +} + +export async function generateFocusedWindowScreenshot(): Promise { + try { + const windowBounds = getActiveWindowBounds(); + if (!windowBounds) { + return; + } + return takeScreenshotOfDisplay(windowBounds); + } catch (err) { + console.error('Error taking screenshot:', err); + return undefined; + } +} + +async function takeScreenshotOfDisplay(cropDimensions?: IBoundingBox): Promise { + const windowBounds = getActiveWindowBounds(); + if (!windowBounds) { + return undefined; + } + const store = new DisposableStore(); + + // Create a video element to play the captured screen source + const video = document.createElement('video'); + store.add(toDisposable(() => video.remove())); + let stream: MediaStream | undefined; + try { + // TODO: This needs to get the stream for the actual window when strictly taking a + // screenshot of the window, so as to not leak windows in the foreground (eg. a always + // on top video) + // Create a stream from the screen source (capture screen without audio) + stream = await navigator.mediaDevices.getDisplayMedia({ + audio: false, + video: true + }); + + // Set the stream as the source of the video element + video.srcObject = stream; + video.play(); + + // Wait for the video to load properly before capturing the screenshot + await Promise.all([ + new Promise(r => store.add(addDisposableListener(video, 'loadedmetadata', () => r()))), + new Promise(r => store.add(addDisposableListener(video, 'canplaythrough', () => r()))) + ]); + + // Create a canvas element with the size of the cropped region + if (!cropDimensions) { + cropDimensions = new BoundingBox(0, 0, video.videoWidth, video.videoHeight); + } + const canvas = document.createElement('canvas'); + canvas.width = cropDimensions.width; + canvas.height = cropDimensions.height; + + const ctx = canvas.getContext('2d'); + if (!ctx) { + return undefined; + } + + // Draw the portion of the video (x, y) with the specified width and height + ctx.drawImage(video, + // Source + cropDimensions.x, cropDimensions.y, cropDimensions.width, cropDimensions.height, + // Dest + 0, 0, cropDimensions.width, cropDimensions.height, + ); + + // Convert the canvas to a Blob (JPEG format), use .95 for quality + const blob: Blob | null = await new Promise((resolve) => canvas.toBlob((blob) => resolve(blob), 'image/jpeg', 0.95)); + if (!blob) { + throw new Error('Failed to create blob from canvas'); + } + + // Convert the Blob to an ArrayBuffer + return blob.arrayBuffer(); + + } catch (error) { + console.error('Error taking screenshot:', error); + return undefined; + } finally { + store.dispose(); + if (stream) { + for (const track of stream.getTracks()) { + track.stop(); + } + } + } +} + + +function getActiveWindowBounds(): IBoundingBox | undefined { + const window = getActiveWindow(); + if (!window) { + return; + } + const displayOffsetX = 'availLeft' in window.screen && typeof window.screen.availLeft === 'number' ? window.screen.availLeft : 0; + const displayOffsetY = 'availTop' in window.screen && typeof window.screen.availTop === 'number' ? window.screen.availTop : 0; + // This handling of dimensions is flaky, if the the active windoow is on the first monitor and + // DPRs differ this may not work properly. + return new BoundingBox( + Math.round((window.screenX - displayOffsetX) * window.devicePixelRatio), + Math.round((window.screenY - displayOffsetY) * window.devicePixelRatio), + Math.round(window.innerWidth * window.devicePixelRatio), + Math.round(window.innerHeight * window.devicePixelRatio), + ); +} + diff --git a/src/vs/workbench/contrib/chat/browser/actions/chatActions.ts b/src/vs/workbench/contrib/chat/browser/actions/chatActions.ts index e272d68126fa7..4f13f5d51aabd 100644 --- a/src/vs/workbench/contrib/chat/browser/actions/chatActions.ts +++ b/src/vs/workbench/contrib/chat/browser/actions/chatActions.ts @@ -38,6 +38,7 @@ import { CHAT_VIEW_ID, IChatWidget, IChatWidgetService, showChatView } from '../ import { IChatEditorOptions } from '../chatEditor.js'; import { ChatEditorInput } from '../chatEditorInput.js'; import { ChatViewPane } from '../chatViewPane.js'; +import { getScreenshotAsVariable } from '../contrib/screenshot.js'; import { clearChatEditor } from './chatClear.js'; export const CHAT_CATEGORY = localize2('chat.category', 'Chat'); @@ -58,9 +59,9 @@ export interface IChatViewOpenOptions { previousRequests?: IChatViewOpenRequestEntry[]; /** - * The image(s) to include in the request + * Whether a screenshot of the focused window should be taken and attached */ - images?: IChatImageAttachment[]; + attachScreenshot?: boolean; } export interface IChatImageAttachment { @@ -113,14 +114,10 @@ class OpenChatGlobalAction extends Action2 { chatService.addCompleteRequest(chatWidget.viewModel.sessionId, request, undefined, 0, { message: response }); } } - if (opts?.images) { - chatWidget.attachmentModel.clear(); - for (const image of opts.images) { - chatWidget.attachmentModel.addContext({ - ...image, - isDynamic: true, - isImage: true - }); + if (opts?.attachScreenshot) { + const screenshot = await getScreenshotAsVariable(); + if (screenshot) { + chatWidget.attachmentModel.addContext(screenshot); } } if (opts?.query) { diff --git a/src/vs/workbench/contrib/chat/browser/actions/chatContextActions.ts b/src/vs/workbench/contrib/chat/browser/actions/chatContextActions.ts index ab4664ef5e5f8..c540bd29d33be 100644 --- a/src/vs/workbench/contrib/chat/browser/actions/chatContextActions.ts +++ b/src/vs/workbench/contrib/chat/browser/actions/chatContextActions.ts @@ -44,6 +44,7 @@ import { IChatWidget, IChatWidgetService, IQuickChatService, showChatView } from import { isQuickChat } from '../chatWidget.js'; import { CHAT_CATEGORY } from './chatActions.js'; import { SearchView } from '../../../search/browser/searchView.js'; +import { getScreenshotAsVariable, ScreenshotVariableId } from '../contrib/screenshot.js'; export function registerChatContextActions() { registerAction2(AttachContextAction); @@ -54,7 +55,7 @@ export function registerChatContextActions() { /** * We fill the quickpick with these types, and enable some quick access providers */ -type IAttachmentQuickPickItem = ICommandVariableQuickPickItem | IQuickAccessQuickPickItem | IToolQuickPickItem | IImageQuickPickItem | IVariableQuickPickItem | IOpenEditorsQuickPickItem | ISearchResultsQuickPickItem; +type IAttachmentQuickPickItem = ICommandVariableQuickPickItem | IQuickAccessQuickPickItem | IToolQuickPickItem | IImageQuickPickItem | IVariableQuickPickItem | IOpenEditorsQuickPickItem | ISearchResultsQuickPickItem | IScreenShotQuickPickItem; /** * These are the types that we can get out of the quick pick @@ -95,6 +96,12 @@ function isISearchResultsQuickPickItem(obj: unknown): obj is ISearchResultsQuick && (obj as ISearchResultsQuickPickItem).kind === 'search-results'); } +function isScreenshotQuickPickItem(obj: unknown): obj is IScreenShotQuickPickItem { + return ( + typeof obj === 'object' + && (obj as IScreenShotQuickPickItem).kind === 'screenshot'); +} + interface IImageQuickPickItem extends IQuickPickItem { kind: 'image'; id: string; @@ -141,6 +148,12 @@ interface ISearchResultsQuickPickItem extends IQuickPickItem { icon?: ThemeIcon; } +interface IScreenShotQuickPickItem extends IQuickPickItem { + kind: 'screenshot'; + id: string; + icon?: ThemeIcon; +} + class AttachFileAction extends Action2 { static readonly ID = 'workbench.action.chat.attachFile'; @@ -326,6 +339,12 @@ export class AttachContextAction extends Action2 { }); chatEditingService?.addFileToWorkingSet(result.resource); } + } else if (isScreenshotQuickPickItem(pick)) { + const variable = await getScreenshotAsVariable(); + if (!variable) { + return; + } + toAttach.push(variable); } else { // Anything else is an attachment const attachmentPick = pick as IAttachmentQuickPickItem; @@ -481,7 +500,15 @@ export class AttachContextAction extends Action2 { prefix: SymbolsQuickAccessProvider.PREFIX, id: 'symbol' }); - + if (configurationService.getValue('chat.experimental.imageAttachments')) { + quickPickItems.push({ + kind: 'screenshot', + id: ScreenshotVariableId, + icon: ThemeIcon.fromId(Codicon.deviceCamera.id), + iconClass: ThemeIcon.asClassName(Codicon.deviceCamera), + label: localize('chatContext.attachScreenshot.label', 'Image of the Current VS Code Window'), + }); + } if (widget.location === ChatAgentLocation.Notebook) { quickPickItems.push({ kind: 'command', diff --git a/src/vs/workbench/contrib/chat/browser/contrib/screenshot.ts b/src/vs/workbench/contrib/chat/browser/contrib/screenshot.ts new file mode 100644 index 0000000000000..9840cbc1c349c --- /dev/null +++ b/src/vs/workbench/contrib/chat/browser/contrib/screenshot.ts @@ -0,0 +1,26 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { localize } from '../../../../../nls.js'; +import { generateFocusedWindowScreenshot } from '../../../../../platform/screenshot/browser/screenshot.js'; +import { IChatRequestVariableEntry } from '../../common/chatModel.js'; + +export const ScreenshotVariableId = 'screenshot-focused-window'; + +export async function getScreenshotAsVariable(): Promise { + const screenshot = await generateFocusedWindowScreenshot(); + if (!screenshot) { + return; + } + + return { + id: ScreenshotVariableId, + name: localize('screenshot', 'Screenshot'), + value: new Uint8Array(screenshot), + isImage: true, + isDynamic: true + }; +} +