From 0559ba71488aeddcb522273947536c666bf9382d Mon Sep 17 00:00:00 2001 From: w3c-validate-repos-bot <> Date: Tue, 21 Jan 2025 00:22:13 +0000 Subject: [PATCH] Update report.json, rec-track-repos.json, hr-repos.json --- report.json | 48 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/report.json b/report.json index 5f6699b..75f8766 100644 --- a/report.json +++ b/report.json @@ -9845,7 +9845,7 @@ } ] }, - "timestamp": "2025-01-20T00:22:29.565Z", + "timestamp": "2025-01-21T00:22:13.378Z", "repos": [ { "id": "MDEwOlJlcG9zaXRvcnk4MTAyMTg2MA==", @@ -20394,6 +20394,10 @@ "name": "REVIEW REQUESTED", "color": "6ff265" }, + { + "name": "s:input-events", + "color": "6bc5c6" + }, { "name": "s:test-1", "color": "6bc5c6" @@ -37099,6 +37103,14 @@ "name": "Commenter Timeout (assuming satisfied)", "color": "fef2c0" }, + { + "name": "Compact", + "color": "0B26B8" + }, + { + "name": "Core", + "color": "f9d0c4" + }, { "name": "Editorial (would not change implementations)", "color": "006b75" @@ -37115,6 +37127,10 @@ "name": "i18n-tracker", "color": "F9C9FF" }, + { + "name": "Inferencing", + "color": "306216" + }, { "name": "Needs Group Input/Decision", "color": "0052cc" @@ -37143,6 +37159,10 @@ "name": "privacy-tracker", "color": "d4af37" }, + { + "name": "Profiles", + "color": "FDFBE6" + }, { "name": "security-needs-resolution", "color": "ffff00" @@ -37155,6 +37175,10 @@ "name": "SHACL 1.1", "color": "bfdadc" }, + { + "name": "SPARQL", + "color": "5845A2" + }, { "name": "tag-needs-resolution", "color": "7fff00" @@ -37163,6 +37187,14 @@ "name": "tag-tracker", "color": "7fff00" }, + { + "name": "UI", + "color": "FBA944" + }, + { + "name": "Vocabularies", + "color": "fef2c0" + }, { "name": "Waiting for Commenter", "color": "fbca04" @@ -38453,6 +38485,10 @@ "name": "did method", "color": "9fcef9" }, + { + "name": "did-resolution", + "color": "1d76db" + }, { "name": "Discuss", "color": "8DEC03" @@ -38477,6 +38513,10 @@ "name": "enhancement", "color": "a2eeef" }, + { + "name": "good first issue", + "color": "AA88EF" + }, { "name": "i18n-needs-resolution", "color": "F9C9FF" @@ -38567,7 +38607,7 @@ "text": "{\n \"src_file\": \"index.html\",\n \"type\": \"respec\"\n}\n" }, "autoPublish": { - "text": "# .github/workflows/pr-push.yml\nname: Echidna Auto-publish did-extensions\non:\n push:\n branches: [main]\njobs:\n main:\n name: Build, Validate and Deploy\n runs-on: ubuntu-latest\n steps:\n - name: Begin spec-prod...\n uses: actions/checkout@v4\n - name: Setup Node 22...\n uses: actions/setup-node@v4\n with:\n node-version: 22.x\n - name: Install did-extensions tooling...\n working-directory: tooling\n run: npm i\n - name: Validate did-extensions DID method entries\n working-directory: tooling\n run: npm run validate\n - name: Generate did-extensions registry index\n working-directory: tooling\n run: npm run generate\n - uses: w3c/spec-prod@v2\n with:\n SOURCE: index.html\n TOOLCHAIN: respec\n W3C_ECHIDNA_TOKEN: ${{ secrets.ECHIDNA_TOKEN_EXTENSIONS }}\n W3C_WG_DECISION_URL: https://www.w3.org/2024/09/24-did-minutes.html#r02\n W3C_BUILD_OVERRIDE: |\n shortName: did-extensions\n specStatus: NOTE\n" + "text": "# .github/workflows/pr-push.yml\nname: Echidna Auto-publish did-extensions\non:\n push:\n branches: [main]\n paths: [\"index.html\"]\n\njobs:\n main:\n name: Build, Validate and Deploy\n runs-on: ubuntu-latest\n steps:\n - name: Begin spec-prod...\n uses: actions/checkout@v4\n - name: Setup Node 22...\n uses: actions/setup-node@v4\n with:\n node-version: 22.x\n - name: Install did-extensions tooling...\n working-directory: tooling\n run: npm i\n - name: Validate did-extensions DID method entries\n working-directory: tooling\n run: npm run validate\n - name: Generate did-extensions registry index\n working-directory: tooling\n run: npm run generate\n - uses: w3c/spec-prod@v2\n with:\n SOURCE: index.html\n TOOLCHAIN: respec\n W3C_ECHIDNA_TOKEN: ${{ secrets.ECHIDNA_TOKEN_EXTENSIONS }}\n W3C_WG_DECISION_URL: https://www.w3.org/2024/09/24-did-minutes.html#r02\n W3C_BUILD_OVERRIDE: |\n shortName: did-extensions\n specStatus: NOTE\n" }, "travis": null, "contributing": { @@ -102084,7 +102124,7 @@ "license": null, "codeOfConduct": null, "readme": { - "text": "# SHACL\nSHACL Community Group\n\nSHACL Documents:\n\n* [SHACL Core](https://w3c.github.io/shacl/shacl-core)\n* [SHACL SPARQL](https://w3c.github.io/shacl/shacl-sparql)\n* [SHACL Advanced features](https://w3c.github.io/shacl/shacl-af)\n* [SHACL Compact Syntax](https://w3c.github.io/shacl/shacl-compact-syntax)\n* [SHACL JS](https://w3c.github.io/shacl/shacl-js)\n" + "text": "# SHACL\nSHACL Community Group (Closed as of January 2025)\n\nThis Community Group and its GitHub repo are now closed and should no longer be used.\n\nOngoing SHACL work towards version 1.2 happens in https://github.com/w3c/data-shapes\n\nSHACL Document Drafts from the Community Group:\n\n* [SHACL Core](https://w3c.github.io/shacl/shacl-core)\n* [SHACL SPARQL](https://w3c.github.io/shacl/shacl-sparql)\n* [SHACL Advanced features](https://w3c.github.io/shacl/shacl-af)\n* [SHACL Compact Syntax](https://w3c.github.io/shacl/shacl-compact-syntax)\n* [SHACL JS](https://w3c.github.io/shacl/shacl-js)\n" }, "w3c": { "group": 73865, @@ -175026,7 +175066,7 @@ }, "codeOfConduct": null, "readme": { - "text": "# Explainer for the Prompt API\n\n_This proposal is an early design sketch by the Chrome built-in AI team to describe the problem below and solicit feedback on the proposed solution. It has not been approved to ship in Chrome._\n\nBrowsers and operating systems are increasingly expected to gain access to a language model. ([Example](https://developer.chrome.com/docs/ai/built-in), [example](https://blogs.windows.com/windowsdeveloper/2024/05/21/unlock-a-new-era-of-innovation-with-windows-copilot-runtime-and-copilot-pcs/), [example](https://www.apple.com/apple-intelligence/).) Language models are known for their versatility. With enough creative [prompting](https://developers.google.com/machine-learning/resources/prompt-eng), they can help accomplish tasks as diverse as:\n\n* Classification, tagging, and keyword extraction of arbitrary text;\n* Helping users compose text, such as blog posts, reviews, or biographies;\n* Summarizing, e.g. of articles, user reviews, or chat logs;\n* Generating titles or headlines from article contents\n* Answering questions based on the unstructured contents of a web page\n* Translation between languages\n* Proofreading\n\nAlthough the Chrome built-in AI team is exploring purpose-built APIs for some of these use cases (e.g. [translation](https://github.com/webmachinelearning/translation-api), and perhaps in the future summarization and compose), we are also exploring a general-purpose \"prompt API\" which allows web developers to prompt a language model directly. This gives web developers access to many more capabilities, at the cost of requiring them to do their own prompt engineering.\n\nCurrently, web developers wishing to use language models must either call out to cloud APIs, or bring their own and run them using technologies like WebAssembly and WebGPU. By providing access to the browser or operating system's existing language model, we can provide the following benefits compared to cloud APIs:\n\n* Local processing of sensitive data, e.g. allowing websites to combine AI features with end-to-end encryption.\n* Potentially faster results, since there is no server round-trip involved.\n* Offline usage.\n* Lower API costs for web developers.\n* Allowing hybrid approaches, e.g. free users of a website use on-device AI whereas paid users use a more powerful API-based model.\n\nSimilarly, compared to bring-your-own-AI approaches, using a built-in language model can save the user's bandwidth, likely benefit from more optimizations, and have a lower barrier to entry for web developers.\n\n**Even more so than many other behind-a-flag APIs, the prompt API is an experiment, designed to help us understand web developers' use cases to inform a roadmap of purpose-built APIs.** However, we want to publish an explainer to provide documentation and a public discussion place for the experiment while it is ongoing.\n\n## Goals\n\nOur goals are to:\n\n* Provide web developers a uniform JavaScript API for accessing browser-provided language models.\n* Abstract away specific details of the language model in question as much as possible, e.g. tokenization, system messages, or control tokens.\n* Guide web developers to gracefully handle failure cases, e.g. no browser-provided model being available.\n* Allow a variety of implementation strategies, including on-device or cloud-based models, while keeping these details abstracted from developers.\n\nThe following are explicit non-goals:\n\n* We do not intend to force every browser to ship or expose a language model; in particular, not all devices will be capable of storing or running one. It would be conforming to implement this API by always signaling that no language model is available, or to implement this API entirely by using cloud services instead of on-device models.\n* We do not intend to provide guarantees of language model quality, stability, or interoperability between browsers. In particular, we cannot guarantee that the models exposed by these APIs are particularly good at any given use case. These are left as quality-of-implementation issues, similar to the [shape detection API](https://wicg.github.io/shape-detection-api/). (See also a [discussion of interop](https://www.w3.org/reports/ai-web-impact/#interop) in the W3C \"AI & the Web\" document.)\n\nThe following are potential goals we are not yet certain of:\n\n* Allow web developers to know, or control, whether language model interactions are done on-device or using cloud services. This would allow them to guarantee that any user data they feed into this API does not leave the device, which can be important for privacy purposes. Similarly, we might want to allow developers to request on-device-only language models, in case a browser offers both varieties.\n* Allow web developers to know some identifier for the language model in use, separate from the browser version. This would allow them to allowlist or blocklist specific models to maintain a desired level of quality, or restrict certain use cases to a specific model.\n\nBoth of these potential goals could pose challenges to interoperability, so we want to investigate more how important such functionality is to developers to find the right tradeoff.\n\n## Examples\n\n### Zero-shot prompting\n\nIn this example, a single string is used to prompt the API, which is assumed to come from the user. The returned response is from the language model.\n\n```js\nconst session = await ai.languageModel.create();\n\n// Prompt the model and wait for the whole result to come back.\nconst result = await session.prompt(\"Write me a poem.\");\nconsole.log(result);\n\n// Prompt the model and stream the result:\nconst stream = await session.promptStreaming(\"Write me an extra-long poem.\");\nfor await (const chunk of stream) {\n console.log(chunk);\n}\n```\n\n### System prompts\n\nThe language model can be configured with a special \"system prompt\" which gives it the context for future interactions:\n\n```js\nconst session = await ai.languageModel.create({\n systemPrompt: \"Pretend to be an eloquent hamster.\"\n});\n\nconsole.log(await session.prompt(\"What is your favorite food?\"));\n```\n\nThe system prompt is special, in that the language model will not respond to it, and it will be preserved even if the context window otherwise overflows due to too many calls to `prompt()`.\n\nIf the system prompt is too large (see [below](#tokenization-context-window-length-limits-and-overflow)), then the promise will be rejected with a `\"QuotaExceededError\"` `DOMException`.\n\n### N-shot prompting\n\nIf developers want to provide examples of the user/assistant interaction, they can use the `initialPrompts` array. This aligns with the common \"chat completions API\" format of `{ role, content }` pairs, including a `\"system\"` role which can be used instead of the `systemPrompt` option shown above.\n\n```js\nconst session = await ai.languageModel.create({\n initialPrompts: [\n { role: \"system\", content: \"Predict up to 5 emojis as a response to a comment. Output emojis, comma-separated.\" },\n { role: \"user\", content: \"This is amazing!\" },\n { role: \"assistant\", content: \"❤️, ➕\" },\n { role: \"user\", content: \"LGTM\" },\n { role: \"assistant\", content: \"👍, 🚢\" }\n ]\n});\n\n// Clone an existing session for efficiency, instead of recreating one each time.\nasync function predictEmoji(comment) {\n const freshSession = await session.clone();\n return await freshSession.prompt(comment);\n}\n\nconst result1 = await predictEmoji(\"Back to the drawing board\");\n\nconst result2 = await predictEmoji(\"This code is so good you should get promoted\");\n```\n\n(Note that merely creating a session does not cause any new responses from the language model. We need to call `prompt()` or `promptStreaming()` to get a response.)\n\nSome details on error cases:\n\n* Using both `systemPrompt` and a `{ role: \"system\" }` prompt in `initialPrompts`, or using multiple `{ role: \"system\" }` prompts, or placing the `{ role: \"system\" }` prompt anywhere besides at the 0th position in `initialPrompts`, will reject with a `TypeError`.\n* If the combined token length of all the initial prompts (including the separate `systemPrompt`, if provided) is too large, then the promise will be rejected with a `\"QuotaExceededError\"` `DOMException`.\n\n### Customizing the role per prompt\n\nOur examples so far have provided `prompt()` and `promptStreaming()` with a single string. Such cases assume messages will come from the user role. These methods can also take in objects in the `{ role, content }` format, or arrays of such objects, in case you want to provide multiple user or assistant messages before getting another assistant message:\n\n```js\nconst multiUserSession = await ai.languageModel.create({\n systemPrompt: \"You are a mediator in a discussion between two departments.\"\n});\n\nconst result = await multiUserSession.prompt([\n { role: \"user\", content: \"Marketing: We need more budget for advertising campaigns.\" },\n { role: \"user\", content: \"Finance: We need to cut costs and advertising is on the list.\" },\n { role: \"assistant\", content: \"Let's explore a compromise that satisfies both departments.\" }\n]);\n\n// `result` will contain a compromise proposal from the assistant.\n```\n\nBecause of their special behavior of being preserved on context window overflow, system prompts cannot be provided this way.\n\n### Emulating tool use or function-calling via assistant-role prompts\n\nA special case of the above is using the assistant role to emulate tool use or function-calling, by marking a response as coming from the assistant side of the conversation:\n\n```js\nconst session = await ai.languageModel.create({\n systemPrompt: `\n You are a helpful assistant. You have access to the following tools:\n - calculator: A calculator. To use it, write \"CALCULATOR: \" where is a valid mathematical expression.\n `\n});\n\nasync function promptWithCalculator(prompt) {\n const result = await session.prompt(prompt);\n\n // Check if the assistant wants to use the calculator tool.\n const match = /^CALCULATOR: (.*)$/.exec(result);\n if (match) {\n const expression = match[1];\n const mathResult = evaluateMathExpression(expression);\n\n // Add the result to the session so it's in context going forward.\n await session.prompt({ role: \"assistant\", content: mathResult });\n\n // Return it as if that's what the assistant said to the user.\n return mathResult;\n }\n\n // The assistant didn't want to use the calculator. Just return its response.\n return result;\n}\n\nconsole.log(await promptWithCalculator(\"What is 2 + 2?\"));\n```\n\nWe'll likely explore more specific APIs for tool- and function-calling in the future; follow along in [issue #7](https://github.com/webmachinelearning/prompt-api/issues/7).\n\n### Configuration of per-session parameters\n\nIn addition to the `systemPrompt` and `initialPrompts` options shown above, the currently-configurable model parameters are [temperature](https://huggingface.co/blog/how-to-generate#sampling) and [top-K](https://huggingface.co/blog/how-to-generate#top-k-sampling). The `params()` API gives the default, minimum, and maximum values for these parameters.\n\n_However, see [issue #42](https://github.com/webmachinelearning/prompt-api/issues/42): sampling hyperparameters are not universal among models._\n\n```js\nconst customSession = await ai.languageModel.create({\n temperature: 0.8,\n topK: 10\n});\n\nconst params = await ai.languageModel.params();\nconst slightlyHighTemperatureSession = await ai.languageModel.create({\n temperature: Math.max(\n params.defaultTemperature * 1.2,\n params.maxTemperature\n ),\n topK: 10\n});\n\n// params also contains defaultTopK and maxTopK.\n```\n\nIf the language model is not available at all in this browser, `params()` will fulfill with `null`.\n\n### Session persistence and cloning\n\nEach language model session consists of a persistent series of interactions with the model:\n\n```js\nconst session = await ai.languageModel.create({\n systemPrompt: \"You are a friendly, helpful assistant specialized in clothing choices.\"\n});\n\nconst result = await session.prompt(`\n What should I wear today? It's sunny and I'm unsure between a t-shirt and a polo.\n`);\n\nconsole.log(result);\n\nconst result2 = await session.prompt(`\n That sounds great, but oh no, it's actually going to rain! New advice??\n`);\n```\n\nMultiple unrelated continuations of the same prompt can be set up by creating a session and then cloning it:\n\n```js\nconst session = await ai.languageModel.create({\n systemPrompt: \"You are a friendly, helpful assistant specialized in clothing choices.\"\n});\n\nconst session2 = await session.clone();\n```\n\nThe clone operation can be aborted using an `AbortSignal`:\n\n```js\nconst controller = new AbortController();\nconst session2 = await session.clone({ signal: controller.signal });\n```\n\n### Session destruction\n\nA language model session can be destroyed, either by using an `AbortSignal` passed to the `create()` method call:\n\n```js\nconst controller = new AbortController();\nstopButton.onclick = () => controller.abort();\n\nconst session = await ai.languageModel.create({ signal: controller.signal });\n```\n\nor by calling `destroy()` on the session:\n\n```js\nstopButton.onclick = () => session.destroy();\n```\n\nDestroying a session will have the following effects:\n\n* If done before the promise returned by `create()` is settled:\n\n * Stop signaling any ongoing download progress for the language model. (The browser may also abort the download, or may continue it. Either way, no further `downloadprogress` events will fire.)\n\n * Reject the `create()` promise.\n\n* Otherwise:\n\n * Reject any ongoing calls to `prompt()`.\n\n * Error any `ReadableStream`s returned by `promptStreaming()`.\n\n* Most importantly, destroying the session allows the user agent to unload the language model from memory, if no other APIs or sessions are using it.\n\nIn all cases the exception used for rejecting promises or erroring `ReadableStream`s will be an `\"AbortError\"` `DOMException`, or the given abort reason.\n\nThe ability to manually destroy a session allows applications to free up memory without waiting for garbage collection, which can be useful since language models can be quite large.\n\n### Aborting a specific prompt\n\nSpecific calls to `prompt()` or `promptStreaming()` can be aborted by passing an `AbortSignal` to them:\n\n```js\nconst controller = new AbortController();\nstopButton.onclick = () => controller.abort();\n\nconst result = await session.prompt(\"Write me a poem\", { signal: controller.signal });\n```\n\nNote that because sessions are stateful, and prompts can be queued, aborting a specific prompt is slightly complicated:\n\n* If the prompt is still queued behind other prompts in the session, then it will be removed from the queue.\n* If the prompt is being currently processed by the model, then it will be aborted, and the prompt/response pair will be removed from the conversation history.\n* If the prompt has already been fully processed by the model, then attempting to abort the prompt will do nothing.\n\n### Tokenization, context window length limits, and overflow\n\nA given language model session will have a maximum number of tokens it can process. Developers can check their current usage and progress toward that limit by using the following properties on the session object:\n\n```js\nconsole.log(`${session.tokensSoFar}/${session.maxTokens} (${session.tokensLeft} left)`);\n```\n\nTo know how many tokens a string will consume, without actually processing it, developers can use the `countPromptTokens()` method:\n\n```js\nconst numTokens = await session.countPromptTokens(promptString);\n```\n\nSome notes on this API:\n\n* We do not expose the actual tokenization to developers since that would make it too easy to depend on model-specific details.\n* Implementations must include in their count any control tokens that will be necessary to process the prompt, e.g. ones indicating the start or end of the input.\n* The counting process can be aborted by passing an `AbortSignal`, i.e. `session.countPromptTokens(promptString, { signal })`.\n\nIt's possible to send a prompt that causes the context window to overflow. That is, consider a case where `session.countPromptTokens(promptString) > session.tokensLeft` before calling `session.prompt(promptString)`, and then the web developer calls `session.prompt(promptString)` anyway. In such cases, the initial portions of the conversation with the language model will be removed, one prompt/response pair at a time, until enough tokens are available to process the new prompt. The exception is the [system prompt](#system-prompts), which is never removed. If it's not possible to remove enough tokens from the conversation history to process the new prompt, then the `prompt()` or `promptStreaming()` call will fail with an `\"QuotaExceededError\"` `DOMException` and nothing will be removed.\n\nSuch overflows can be detected by listening for the `\"contextoverflow\"` event on the session:\n\n```js\nsession.addEventListener(\"contextoverflow\", () => {\n console.log(\"Context overflow!\");\n});\n```\n\n### Multilingual content and expected languages\n\nThe default behavior for a language model session assumes that the input languages are unknown. In this case, implementations will use whatever \"base\" capabilities they have available for the language model, and might throw `\"NotSupportedError\"` `DOMException`s if they encounter languages they don't support.\n\nIt's better practice, if possible, to supply the `create()` method with information about the expected input languages. This allows the implementation to download any necessary supporting material, such as fine-tunings or safety-checking models, and to immediately reject the promise returned by `create()` if the web developer needs to use languages that the browser is not capable of supporting:\n\n```js\nconst session = await ai.languageModel.create({\n systemPrompt: `\n You are a foreign-language tutor for Japanese. The user is Korean. If necessary, either you or\n the user might \"break character\" and ask for or give clarification in Korean. But by default,\n prefer speaking in Japanese, and return to the Japanese conversation once any sidebars are\n concluded.\n `,\n expectedInputLanguages: [\"en\" /* for the system prompt */, \"ja\", \"kr\"]\n});\n```\n\nNote that there is no way of specifying output languages, since these are governed by the language model's own decisions. Similarly, the expected input languages do not affect the context or prompt the language model sees; they only impact the process of setting up the session and performing appropriate downloads.\n\n### Testing available options before creation\n\nIn the simple case, web developers should call `ai.languageModel.create()`, and handle failures gracefully.\n\nHowever, if the web developer wants to provide a differentiated user experience, which lets users know ahead of time that the feature will not be possible or might require a download, they can use the promise-returning `ai.languageModel.availability()` method. This method lets developers know, before calling `create()`, what is possible with the implementation.\n\nThe method will return a promise that fulfills with one of the following availability values:\n\n* \"`no`\" means that the implementation does not support the requested options, or does not support prompting a language model at all.\n* \"`after-download`\" means that the implementation supports the requested options, but it will have to download something (e.g. the language model itself, or a fine-tuning) before it can create a session using those options.\n* \"`readily`\" means that the implementation supports the requested options without requiring any new downloads.\n\nAn example usage is the following:\n\n```js\nconst options = { expectedInputLanguages: [\"en\", \"es\"], temperature: 2 };\n\nconst supportsOurUseCase = await ai.languageModel.availability(options);\n\nif (supportsOurUseCase !== \"no\") {\n if (supportsOurUseCase === \"after-download\") {\n console.log(\"Sit tight, we need to do some downloading...\");\n }\n\n const session = await ai.languageModel.create({ ...options, systemPrompt: \"...\" });\n // ... Use session ...\n} else {\n // Either the API overall, or the expected languages and temperature setting, is not available.\n console.error(\"No language model for us :(\");\n}\n```\n\n### Download progress\n\nIn cases where the model needs to be downloaded as part of creation, you can monitor the download progress (e.g. in order to show your users a progress bar) using code such as the following:\n\n```js\nconst session = await ai.languageModel.create({\n monitor(m) {\n m.addEventListener(\"downloadprogress\", e => {\n console.log(`Downloaded ${e.loaded} of ${e.total} bytes.`);\n });\n }\n});\n```\n\nIf the download fails, then `downloadprogress` events will stop being emitted, and the promise returned by `create()` will be rejected with a \"`NetworkError`\" `DOMException`.\n\n
\nWhat's up with this pattern?\n\nThis pattern is a little involved. Several alternatives have been considered. However, asking around the web standards community it seemed like this one was best, as it allows using standard event handlers and `ProgressEvent`s, and also ensures that once the promise is settled, the session object is completely ready to use.\n\nIt is also nicely future-extensible by adding more events and properties to the `m` object.\n\nFinally, note that there is a sort of precedent in the (never-shipped) [`FetchObserver` design](https://github.com/whatwg/fetch/issues/447#issuecomment-281731850).\n
\n\n## Detailed design\n\n### Full API surface in Web IDL\n\n```webidl\n// Shared self.ai APIs\n\npartial interface WindowOrWorkerGlobalScope {\n [Replaceable, SecureContext] readonly attribute AI ai;\n};\n\n[Exposed=(Window,Worker), SecureContext]\ninterface AI {\n readonly attribute AILanguageModelFactory languageModel;\n};\n\n[Exposed=(Window,Worker), SecureContext]\ninterface AICreateMonitor : EventTarget {\n attribute EventHandler ondownloadprogress;\n\n // Might get more stuff in the future, e.g. for\n // https://github.com/webmachinelearning/prompt-api/issues/4\n};\n\ncallback AICreateMonitorCallback = undefined (AICreateMonitor monitor);\n\nenum AICapabilityAvailability { \"readily\", \"after-download\", \"no\" };\n```\n\n```webidl\n// Language Model\n\n[Exposed=(Window,Worker), SecureContext]\ninterface AILanguageModelFactory {\n Promise create(optional AILanguageModelCreateOptions options = {});\n Promise availability(optional AILanguageModelCreateCoreOptions options = {});\n Promise params();\n};\n\n[Exposed=(Window,Worker), SecureContext]\ninterface AILanguageModel : EventTarget {\n Promise prompt(AILanguageModelPromptInput input, optional AILanguageModelPromptOptions options = {});\n ReadableStream promptStreaming(AILanguageModelPromptInput input, optional AILanguageModelPromptOptions options = {});\n\n Promise countPromptTokens(AILanguageModelPromptInput input, optional AILanguageModelPromptOptions options = {});\n readonly attribute unsigned long long maxTokens;\n readonly attribute unsigned long long tokensSoFar;\n readonly attribute unsigned long long tokensLeft;\n\n readonly attribute unsigned long topK;\n readonly attribute float temperature;\n readonly attribute FrozenArray? expectedInputLanguages;\n\n attribute EventHandler oncontextoverflow;\n\n Promise clone(optional AILanguageModelCloneOptions options = {});\n undefined destroy();\n};\n\n[Exposed=(Window,Worker), SecureContext]\ninterface AILanguageModelParams {\n readonly attribute unsigned long defaultTopK;\n readonly attribute unsigned long maxTopK;\n readonly attribute float defaultTemperature;\n readonly attribute float maxTemperature;\n};\n\ndictionary AILanguageModelCreateCoreOptions {\n [EnforceRange] unsigned long topK;\n float temperature;\n sequence expectedInputLanguages;\n}\n\ndictionary AILanguageModelCreateOptions : AILanguageModelCreateCoreOptions {\n AbortSignal signal;\n AICreateMonitorCallback monitor;\n\n DOMString systemPrompt;\n sequence initialPrompts;\n};\n\ndictionary AILanguageModelInitialPrompt {\n required AILanguageModelInitialPromptRole role;\n required DOMString content;\n};\n\ndictionary AILanguageModelPrompt {\n required AILanguageModelPromptRole role;\n required DOMString content;\n};\n\ndictionary AILanguageModelPromptOptions {\n AbortSignal signal;\n};\n\ndictionary AILanguageModelCloneOptions {\n AbortSignal signal;\n};\n\ntypedef (DOMString or AILanguageModelPrompt or sequence) AILanguageModelPromptInput;\n\nenum AILanguageModelInitialPromptRole { \"system\", \"user\", \"assistant\" };\nenum AILanguageModelPromptRole { \"user\", \"assistant\" };\n```\n\n### Instruction-tuned versus base models\n\nWe intend for this API to expose instruction-tuned models. Although we cannot mandate any particular level of quality or instruction-following capability, we think setting this base expectation can help ensure that what browsers ship is aligned with what web developers expect.\n\nTo illustrate the difference and how it impacts web developer expectations:\n\n* In a base model, a prompt like \"Write a poem about trees.\" might get completed with \"... Write about the animal you would like to be. Write about a conflict between a brother and a sister.\" (etc.) It is directly completing plausible next tokens in the text sequence.\n* Whereas, in an instruction-tuned model, the model will generally _follow_ instructions like \"Write a poem about trees.\", and respond with a poem about trees.\n\nTo ensure the API can be used by web developers across multiple implementations, all browsers should be sure their models behave like instruction-tuned models.\n\n## Alternatives considered and under consideration\n\n### How many stages to reach a response?\n\nTo actually get a response back from the model given a prompt, the following possible stages are involved:\n\n1. Download the model, if necessary.\n2. Establish a session, including configuring per-session options and parameters.\n3. Add an initial prompt to establish context. (This will not generate a response.)\n4. Execute a prompt and receive a response.\n\nWe've chosen to manifest these 3-4 stages into the API as two methods, `ai.languageModel.create()` and `session.prompt()`/`session.promptStreaming()`, with some additional facilities for dealing with the fact that `ai.languageModel.create()` can include a download step. Some APIs simplify this into a single method, and some split it up into three (usually not four).\n\n### Stateless or session-based\n\nOur design here uses [sessions](#session-persistence-and-cloning). An alternate design, seen in some APIs, is to require the developer to feed in the entire conversation history to the model each time, keeping track of the results.\n\nThis can be slightly more flexible; for example, it allows manually correcting the model's responses before feeding them back into the context window.\n\nHowever, our understanding is that the session-based model can be more efficiently implemented, at least for browsers with on-device models. (Implementing it for a cloud-based model would likely be more work.) And, developers can always achieve a stateless model by using a new session for each interaction.\n\n## Privacy considerations\n\nIf cloud-based language models are exposed through this API, then there are potential privacy issues with exposing user or website data to the relevant cloud and model providers. This is not a concern specific to this API, as websites can already choose to expose user or website data to other origins using APIs such as `fetch()`. However, it's worth keeping in mind, and in particular as discussed in our [Goals](#goals), perhaps we should make it easier for web developers to know whether a cloud-based model is in use, or which one.\n\nIf on-device language models are updated separately from browser and operating system versions, this API could enhance the web's fingerprinting service by providing extra identifying bits. Mandating that older browser versions not receive updates or be able to download models from too far into the future might be a possible remediation for this.\n\nFinally, we intend to prohibit (in the specification) any use of user-specific information that is not directly supplied through the API. For example, it would not be permissible to fine-tune the language model based on information the user has entered into the browser in the past.\n\n## Stakeholder feedback\n\n* W3C TAG: not yet requested\n* Browser engines and browsers:\n * Chromium: prototyping behind a flag\n * Gecko: not yet requested\n * WebKit: not yet requested\n * Edge: not yet requested\n* Web developers: positive ([example](https://x.com/mortenjust/status/1805190952358650251), [example](https://tyingshoelaces.com/blog/chrome-ai-prompt-api), [example](https://labs.thinktecture.com/local-small-language-models-in-the-browser-a-first-glance-at-chromes-built-in-ai-and-prompt-api-with-gemini-nano/))\n" + "text": "# Explainer for the Prompt API\n\n_This proposal is an early design sketch by the Chrome built-in AI team to describe the problem below and solicit feedback on the proposed solution. It has not been approved to ship in Chrome._\n\nBrowsers and operating systems are increasingly expected to gain access to a language model. ([Example](https://developer.chrome.com/docs/ai/built-in), [example](https://blogs.windows.com/windowsdeveloper/2024/05/21/unlock-a-new-era-of-innovation-with-windows-copilot-runtime-and-copilot-pcs/), [example](https://www.apple.com/apple-intelligence/).) Language models are known for their versatility. With enough creative [prompting](https://developers.google.com/machine-learning/resources/prompt-eng), they can help accomplish tasks as diverse as:\n\n* Classification, tagging, and keyword extraction of arbitrary text;\n* Helping users compose text, such as blog posts, reviews, or biographies;\n* Summarizing, e.g. of articles, user reviews, or chat logs;\n* Generating titles or headlines from article contents\n* Answering questions based on the unstructured contents of a web page\n* Translation between languages\n* Proofreading\n\nAlthough the Chrome built-in AI team is exploring purpose-built APIs for some of these use cases (e.g. [translation](https://github.com/webmachinelearning/translation-api), and perhaps in the future summarization and compose), we are also exploring a general-purpose \"prompt API\" which allows web developers to prompt a language model directly. This gives web developers access to many more capabilities, at the cost of requiring them to do their own prompt engineering.\n\nCurrently, web developers wishing to use language models must either call out to cloud APIs, or bring their own and run them using technologies like WebAssembly and WebGPU. By providing access to the browser or operating system's existing language model, we can provide the following benefits compared to cloud APIs:\n\n* Local processing of sensitive data, e.g. allowing websites to combine AI features with end-to-end encryption.\n* Potentially faster results, since there is no server round-trip involved.\n* Offline usage.\n* Lower API costs for web developers.\n* Allowing hybrid approaches, e.g. free users of a website use on-device AI whereas paid users use a more powerful API-based model.\n\nSimilarly, compared to bring-your-own-AI approaches, using a built-in language model can save the user's bandwidth, likely benefit from more optimizations, and have a lower barrier to entry for web developers.\n\n**Even more so than many other behind-a-flag APIs, the prompt API is an experiment, designed to help us understand web developers' use cases to inform a roadmap of purpose-built APIs.** However, we want to publish an explainer to provide documentation and a public discussion place for the experiment while it is ongoing.\n\n## Goals\n\nOur goals are to:\n\n* Provide web developers a uniform JavaScript API for accessing browser-provided language models.\n* Abstract away specific details of the language model in question as much as possible, e.g. tokenization, system messages, or control tokens.\n* Guide web developers to gracefully handle failure cases, e.g. no browser-provided model being available.\n* Allow a variety of implementation strategies, including on-device or cloud-based models, while keeping these details abstracted from developers.\n\nThe following are explicit non-goals:\n\n* We do not intend to force every browser to ship or expose a language model; in particular, not all devices will be capable of storing or running one. It would be conforming to implement this API by always signaling that no language model is available, or to implement this API entirely by using cloud services instead of on-device models.\n* We do not intend to provide guarantees of language model quality, stability, or interoperability between browsers. In particular, we cannot guarantee that the models exposed by these APIs are particularly good at any given use case. These are left as quality-of-implementation issues, similar to the [shape detection API](https://wicg.github.io/shape-detection-api/). (See also a [discussion of interop](https://www.w3.org/reports/ai-web-impact/#interop) in the W3C \"AI & the Web\" document.)\n\nThe following are potential goals we are not yet certain of:\n\n* Allow web developers to know, or control, whether language model interactions are done on-device or using cloud services. This would allow them to guarantee that any user data they feed into this API does not leave the device, which can be important for privacy purposes. Similarly, we might want to allow developers to request on-device-only language models, in case a browser offers both varieties.\n* Allow web developers to know some identifier for the language model in use, separate from the browser version. This would allow them to allowlist or blocklist specific models to maintain a desired level of quality, or restrict certain use cases to a specific model.\n\nBoth of these potential goals could pose challenges to interoperability, so we want to investigate more how important such functionality is to developers to find the right tradeoff.\n\n## Examples\n\n### Zero-shot prompting\n\nIn this example, a single string is used to prompt the API, which is assumed to come from the user. The returned response is from the language model.\n\n```js\nconst session = await ai.languageModel.create();\n\n// Prompt the model and wait for the whole result to come back.\nconst result = await session.prompt(\"Write me a poem.\");\nconsole.log(result);\n\n// Prompt the model and stream the result:\nconst stream = await session.promptStreaming(\"Write me an extra-long poem.\");\nfor await (const chunk of stream) {\n console.log(chunk);\n}\n```\n\n### System prompts\n\nThe language model can be configured with a special \"system prompt\" which gives it the context for future interactions:\n\n```js\nconst session = await ai.languageModel.create({\n systemPrompt: \"Pretend to be an eloquent hamster.\"\n});\n\nconsole.log(await session.prompt(\"What is your favorite food?\"));\n```\n\nThe system prompt is special, in that the language model will not respond to it, and it will be preserved even if the context window otherwise overflows due to too many calls to `prompt()`.\n\nIf the system prompt is too large (see [below](#tokenization-context-window-length-limits-and-overflow)), then the promise will be rejected with a `\"QuotaExceededError\"` `DOMException`.\n\n### N-shot prompting\n\nIf developers want to provide examples of the user/assistant interaction, they can use the `initialPrompts` array. This aligns with the common \"chat completions API\" format of `{ role, content }` pairs, including a `\"system\"` role which can be used instead of the `systemPrompt` option shown above.\n\n```js\nconst session = await ai.languageModel.create({\n initialPrompts: [\n { role: \"system\", content: \"Predict up to 5 emojis as a response to a comment. Output emojis, comma-separated.\" },\n { role: \"user\", content: \"This is amazing!\" },\n { role: \"assistant\", content: \"❤️, ➕\" },\n { role: \"user\", content: \"LGTM\" },\n { role: \"assistant\", content: \"👍, 🚢\" }\n ]\n});\n\n// Clone an existing session for efficiency, instead of recreating one each time.\nasync function predictEmoji(comment) {\n const freshSession = await session.clone();\n return await freshSession.prompt(comment);\n}\n\nconst result1 = await predictEmoji(\"Back to the drawing board\");\n\nconst result2 = await predictEmoji(\"This code is so good you should get promoted\");\n```\n\n(Note that merely creating a session does not cause any new responses from the language model. We need to call `prompt()` or `promptStreaming()` to get a response.)\n\nSome details on error cases:\n\n* Using both `systemPrompt` and a `{ role: \"system\" }` prompt in `initialPrompts`, or using multiple `{ role: \"system\" }` prompts, or placing the `{ role: \"system\" }` prompt anywhere besides at the 0th position in `initialPrompts`, will reject with a `TypeError`.\n* If the combined token length of all the initial prompts (including the separate `systemPrompt`, if provided) is too large, then the promise will be rejected with a `\"QuotaExceededError\"` `DOMException`.\n\n### Customizing the role per prompt\n\nOur examples so far have provided `prompt()` and `promptStreaming()` with a single string. Such cases assume messages will come from the user role. These methods can also take in objects in the `{ role, content }` format, or arrays of such objects, in case you want to provide multiple user or assistant messages before getting another assistant message:\n\n```js\nconst multiUserSession = await ai.languageModel.create({\n systemPrompt: \"You are a mediator in a discussion between two departments.\"\n});\n\nconst result = await multiUserSession.prompt([\n { role: \"user\", content: \"Marketing: We need more budget for advertising campaigns.\" },\n { role: \"user\", content: \"Finance: We need to cut costs and advertising is on the list.\" },\n { role: \"assistant\", content: \"Let's explore a compromise that satisfies both departments.\" }\n]);\n\n// `result` will contain a compromise proposal from the assistant.\n```\n\nBecause of their special behavior of being preserved on context window overflow, system prompts cannot be provided this way.\n\n### Emulating tool use or function-calling via assistant-role prompts\n\nA special case of the above is using the assistant role to emulate tool use or function-calling, by marking a response as coming from the assistant side of the conversation:\n\n```js\nconst session = await ai.languageModel.create({\n systemPrompt: `\n You are a helpful assistant. You have access to the following tools:\n - calculator: A calculator. To use it, write \"CALCULATOR: \" where is a valid mathematical expression.\n `\n});\n\nasync function promptWithCalculator(prompt) {\n const result = await session.prompt(prompt);\n\n // Check if the assistant wants to use the calculator tool.\n const match = /^CALCULATOR: (.*)$/.exec(result);\n if (match) {\n const expression = match[1];\n const mathResult = evaluateMathExpression(expression);\n\n // Add the result to the session so it's in context going forward.\n await session.prompt({ role: \"assistant\", content: mathResult });\n\n // Return it as if that's what the assistant said to the user.\n return mathResult;\n }\n\n // The assistant didn't want to use the calculator. Just return its response.\n return result;\n}\n\nconsole.log(await promptWithCalculator(\"What is 2 + 2?\"));\n```\n\nWe'll likely explore more specific APIs for tool- and function-calling in the future; follow along in [issue #7](https://github.com/webmachinelearning/prompt-api/issues/7).\n\n### Configuration of per-session parameters\n\nIn addition to the `systemPrompt` and `initialPrompts` options shown above, the currently-configurable model parameters are [temperature](https://huggingface.co/blog/how-to-generate#sampling) and [top-K](https://huggingface.co/blog/how-to-generate#top-k-sampling). The `params()` API gives the default, minimum, and maximum values for these parameters.\n\n_However, see [issue #42](https://github.com/webmachinelearning/prompt-api/issues/42): sampling hyperparameters are not universal among models._\n\n```js\nconst customSession = await ai.languageModel.create({\n temperature: 0.8,\n topK: 10\n});\n\nconst params = await ai.languageModel.params();\nconst slightlyHighTemperatureSession = await ai.languageModel.create({\n temperature: Math.max(\n params.defaultTemperature * 1.2,\n params.maxTemperature\n ),\n topK: 10\n});\n\n// params also contains defaultTopK and maxTopK.\n```\n\nIf the language model is not available at all in this browser, `params()` will fulfill with `null`.\n\n### Session persistence and cloning\n\nEach language model session consists of a persistent series of interactions with the model:\n\n```js\nconst session = await ai.languageModel.create({\n systemPrompt: \"You are a friendly, helpful assistant specialized in clothing choices.\"\n});\n\nconst result = await session.prompt(`\n What should I wear today? It's sunny and I'm unsure between a t-shirt and a polo.\n`);\n\nconsole.log(result);\n\nconst result2 = await session.prompt(`\n That sounds great, but oh no, it's actually going to rain! New advice??\n`);\n```\n\nMultiple unrelated continuations of the same prompt can be set up by creating a session and then cloning it:\n\n```js\nconst session = await ai.languageModel.create({\n systemPrompt: \"You are a friendly, helpful assistant specialized in clothing choices.\"\n});\n\nconst session2 = await session.clone();\n```\n\nThe clone operation can be aborted using an `AbortSignal`:\n\n```js\nconst controller = new AbortController();\nconst session2 = await session.clone({ signal: controller.signal });\n```\n\n### Session destruction\n\nA language model session can be destroyed, either by using an `AbortSignal` passed to the `create()` method call:\n\n```js\nconst controller = new AbortController();\nstopButton.onclick = () => controller.abort();\n\nconst session = await ai.languageModel.create({ signal: controller.signal });\n```\n\nor by calling `destroy()` on the session:\n\n```js\nstopButton.onclick = () => session.destroy();\n```\n\nDestroying a session will have the following effects:\n\n* If done before the promise returned by `create()` is settled:\n\n * Stop signaling any ongoing download progress for the language model. (The browser may also abort the download, or may continue it. Either way, no further `downloadprogress` events will fire.)\n\n * Reject the `create()` promise.\n\n* Otherwise:\n\n * Reject any ongoing calls to `prompt()`.\n\n * Error any `ReadableStream`s returned by `promptStreaming()`.\n\n* Most importantly, destroying the session allows the user agent to unload the language model from memory, if no other APIs or sessions are using it.\n\nIn all cases the exception used for rejecting promises or erroring `ReadableStream`s will be an `\"AbortError\"` `DOMException`, or the given abort reason.\n\nThe ability to manually destroy a session allows applications to free up memory without waiting for garbage collection, which can be useful since language models can be quite large.\n\n### Aborting a specific prompt\n\nSpecific calls to `prompt()` or `promptStreaming()` can be aborted by passing an `AbortSignal` to them:\n\n```js\nconst controller = new AbortController();\nstopButton.onclick = () => controller.abort();\n\nconst result = await session.prompt(\"Write me a poem\", { signal: controller.signal });\n```\n\nNote that because sessions are stateful, and prompts can be queued, aborting a specific prompt is slightly complicated:\n\n* If the prompt is still queued behind other prompts in the session, then it will be removed from the queue.\n* If the prompt is being currently processed by the model, then it will be aborted, and the prompt/response pair will be removed from the conversation history.\n* If the prompt has already been fully processed by the model, then attempting to abort the prompt will do nothing.\n\n### Tokenization, context window length limits, and overflow\n\nA given language model session will have a maximum number of tokens it can process. Developers can check their current usage and progress toward that limit by using the following properties on the session object:\n\n```js\nconsole.log(`${session.tokensSoFar}/${session.maxTokens} (${session.tokensLeft} left)`);\n```\n\nTo know how many tokens a string will consume, without actually processing it, developers can use the `countPromptTokens()` method:\n\n```js\nconst numTokens = await session.countPromptTokens(promptString);\n```\n\nSome notes on this API:\n\n* We do not expose the actual tokenization to developers since that would make it too easy to depend on model-specific details.\n* Implementations must include in their count any control tokens that will be necessary to process the prompt, e.g. ones indicating the start or end of the input.\n* The counting process can be aborted by passing an `AbortSignal`, i.e. `session.countPromptTokens(promptString, { signal })`.\n\nIt's possible to send a prompt that causes the context window to overflow. That is, consider a case where `session.countPromptTokens(promptString) > session.tokensLeft` before calling `session.prompt(promptString)`, and then the web developer calls `session.prompt(promptString)` anyway. In such cases, the initial portions of the conversation with the language model will be removed, one prompt/response pair at a time, until enough tokens are available to process the new prompt. The exception is the [system prompt](#system-prompts), which is never removed. If it's not possible to remove enough tokens from the conversation history to process the new prompt, then the `prompt()` or `promptStreaming()` call will fail with an `\"QuotaExceededError\"` `DOMException` and nothing will be removed.\n\nSuch overflows can be detected by listening for the `\"contextoverflow\"` event on the session:\n\n```js\nsession.addEventListener(\"contextoverflow\", () => {\n console.log(\"Context overflow!\");\n});\n```\n\n### Multilingual content and expected languages\n\nThe default behavior for a language model session assumes that the input languages are unknown. In this case, implementations will use whatever \"base\" capabilities they have available for the language model, and might throw `\"NotSupportedError\"` `DOMException`s if they encounter languages they don't support.\n\nIt's better practice, if possible, to supply the `create()` method with information about the expected input languages. This allows the implementation to download any necessary supporting material, such as fine-tunings or safety-checking models, and to immediately reject the promise returned by `create()` if the web developer needs to use languages that the browser is not capable of supporting:\n\n```js\nconst session = await ai.languageModel.create({\n systemPrompt: `\n You are a foreign-language tutor for Japanese. The user is Korean. If necessary, either you or\n the user might \"break character\" and ask for or give clarification in Korean. But by default,\n prefer speaking in Japanese, and return to the Japanese conversation once any sidebars are\n concluded.\n `,\n expectedInputLanguages: [\"en\" /* for the system prompt */, \"ja\", \"kr\"]\n});\n```\n\nNote that there is no way of specifying output languages, since these are governed by the language model's own decisions. Similarly, the expected input languages do not affect the context or prompt the language model sees; they only impact the process of setting up the session and performing appropriate downloads.\n\n### Testing available options before creation\n\nIn the simple case, web developers should call `ai.languageModel.create()`, and handle failures gracefully.\n\nHowever, if the web developer wants to provide a differentiated user experience, which lets users know ahead of time that the feature will not be possible or might require a download, they can use the promise-returning `ai.languageModel.availability()` method. This method lets developers know, before calling `create()`, what is possible with the implementation.\n\nThe method will return a promise that fulfills with one of the following availability values:\n\n* \"`no`\" means that the implementation does not support the requested options, or does not support prompting a language model at all.\n* \"`after-download`\" means that the implementation supports the requested options, but it will have to download something (e.g. the language model itself, or a fine-tuning) before it can create a session using those options.\n* \"`readily`\" means that the implementation supports the requested options without requiring any new downloads.\n\nAn example usage is the following:\n\n```js\nconst options = { expectedInputLanguages: [\"en\", \"es\"], temperature: 2 };\n\nconst supportsOurUseCase = await ai.languageModel.availability(options);\n\nif (supportsOurUseCase !== \"no\") {\n if (supportsOurUseCase === \"after-download\") {\n console.log(\"Sit tight, we need to do some downloading...\");\n }\n\n const session = await ai.languageModel.create({ ...options, systemPrompt: \"...\" });\n // ... Use session ...\n} else {\n // Either the API overall, or the expected languages and temperature setting, is not available.\n console.error(\"No language model for us :(\");\n}\n```\n\n### Download progress\n\nIn cases where the model needs to be downloaded as part of creation, you can monitor the download progress (e.g. in order to show your users a progress bar) using code such as the following:\n\n```js\nconst session = await ai.languageModel.create({\n monitor(m) {\n m.addEventListener(\"downloadprogress\", e => {\n console.log(`Downloaded ${e.loaded} of ${e.total} bytes.`);\n });\n }\n});\n```\n\nIf the download fails, then `downloadprogress` events will stop being emitted, and the promise returned by `create()` will be rejected with a \"`NetworkError`\" `DOMException`.\n\n
\nWhat's up with this pattern?\n\nThis pattern is a little involved. Several alternatives have been considered. However, asking around the web standards community it seemed like this one was best, as it allows using standard event handlers and `ProgressEvent`s, and also ensures that once the promise is settled, the session object is completely ready to use.\n\nIt is also nicely future-extensible by adding more events and properties to the `m` object.\n\nFinally, note that there is a sort of precedent in the (never-shipped) [`FetchObserver` design](https://github.com/whatwg/fetch/issues/447#issuecomment-281731850).\n
\n\n## Detailed design\n\n### Full API surface in Web IDL\n\n```webidl\n// Shared self.ai APIs\n\npartial interface WindowOrWorkerGlobalScope {\n [Replaceable, SecureContext] readonly attribute AI ai;\n};\n\n[Exposed=(Window,Worker), SecureContext]\ninterface AI {\n readonly attribute AILanguageModelFactory languageModel;\n};\n\n[Exposed=(Window,Worker), SecureContext]\ninterface AICreateMonitor : EventTarget {\n attribute EventHandler ondownloadprogress;\n\n // Might get more stuff in the future, e.g. for\n // https://github.com/webmachinelearning/prompt-api/issues/4\n};\n\ncallback AICreateMonitorCallback = undefined (AICreateMonitor monitor);\n\nenum AICapabilityAvailability { \"readily\", \"after-download\", \"no\" };\n```\n\n```webidl\n// Language Model\n\n[Exposed=(Window,Worker), SecureContext]\ninterface AILanguageModelFactory {\n Promise create(optional AILanguageModelCreateOptions options = {});\n Promise availability(optional AILanguageModelCreateCoreOptions options = {});\n Promise params();\n};\n\n[Exposed=(Window,Worker), SecureContext]\ninterface AILanguageModel : EventTarget {\n Promise prompt(AILanguageModelPromptInput input, optional AILanguageModelPromptOptions options = {});\n ReadableStream promptStreaming(AILanguageModelPromptInput input, optional AILanguageModelPromptOptions options = {});\n\n Promise countPromptTokens(AILanguageModelPromptInput input, optional AILanguageModelPromptOptions options = {});\n readonly attribute unsigned long long maxTokens;\n readonly attribute unsigned long long tokensSoFar;\n readonly attribute unsigned long long tokensLeft;\n\n readonly attribute unsigned long topK;\n readonly attribute float temperature;\n readonly attribute FrozenArray? expectedInputLanguages;\n\n attribute EventHandler oncontextoverflow;\n\n Promise clone(optional AILanguageModelCloneOptions options = {});\n undefined destroy();\n};\n\n[Exposed=(Window,Worker), SecureContext]\ninterface AILanguageModelParams {\n readonly attribute unsigned long defaultTopK;\n readonly attribute unsigned long maxTopK;\n readonly attribute float defaultTemperature;\n readonly attribute float maxTemperature;\n};\n\ndictionary AILanguageModelCreateCoreOptions {\n [EnforceRange] unsigned long topK;\n float temperature;\n sequence expectedInputLanguages;\n}\n\ndictionary AILanguageModelCreateOptions : AILanguageModelCreateCoreOptions {\n AbortSignal signal;\n AICreateMonitorCallback monitor;\n\n DOMString systemPrompt;\n sequence initialPrompts;\n};\n\ndictionary AILanguageModelInitialPrompt {\n required AILanguageModelInitialPromptRole role;\n required DOMString content;\n};\n\ndictionary AILanguageModelPrompt {\n required AILanguageModelPromptRole role;\n required DOMString content;\n};\n\ndictionary AILanguageModelPromptOptions {\n AbortSignal signal;\n};\n\ndictionary AILanguageModelCloneOptions {\n AbortSignal signal;\n};\n\ntypedef (DOMString or AILanguageModelPrompt or sequence) AILanguageModelPromptInput;\n\nenum AILanguageModelInitialPromptRole { \"system\", \"user\", \"assistant\" };\nenum AILanguageModelPromptRole { \"user\", \"assistant\" };\n```\n\n### Instruction-tuned versus base models\n\nWe intend for this API to expose instruction-tuned models. Although we cannot mandate any particular level of quality or instruction-following capability, we think setting this base expectation can help ensure that what browsers ship is aligned with what web developers expect.\n\nTo illustrate the difference and how it impacts web developer expectations:\n\n* In a base model, a prompt like \"Write a poem about trees.\" might get completed with \"... Write about the animal you would like to be. Write about a conflict between a brother and a sister.\" (etc.) It is directly completing plausible next tokens in the text sequence.\n* Whereas, in an instruction-tuned model, the model will generally _follow_ instructions like \"Write a poem about trees.\", and respond with a poem about trees.\n\nTo ensure the API can be used by web developers across multiple implementations, all browsers should be sure their models behave like instruction-tuned models.\n\n## Alternatives considered and under consideration\n\n### How many stages to reach a response?\n\nTo actually get a response back from the model given a prompt, the following possible stages are involved:\n\n1. Download the model, if necessary.\n2. Establish a session, including configuring per-session options and parameters.\n3. Add an initial prompt to establish context. (This will not generate a response.)\n4. Execute a prompt and receive a response.\n\nWe've chosen to manifest these 3-4 stages into the API as two methods, `ai.languageModel.create()` and `session.prompt()`/`session.promptStreaming()`, with some additional facilities for dealing with the fact that `ai.languageModel.create()` can include a download step. Some APIs simplify this into a single method, and some split it up into three (usually not four).\n\n### Stateless or session-based\n\nOur design here uses [sessions](#session-persistence-and-cloning). An alternate design, seen in some APIs, is to require the developer to feed in the entire conversation history to the model each time, keeping track of the results.\n\nThis can be slightly more flexible; for example, it allows manually correcting the model's responses before feeding them back into the context window.\n\nHowever, our understanding is that the session-based model can be more efficiently implemented, at least for browsers with on-device models. (Implementing it for a cloud-based model would likely be more work.) And, developers can always achieve a stateless model by using a new session for each interaction.\n\n## Privacy considerations\n\nIf cloud-based language models are exposed through this API, then there are potential privacy issues with exposing user or website data to the relevant cloud and model providers. This is not a concern specific to this API, as websites can already choose to expose user or website data to other origins using APIs such as `fetch()`. However, it's worth keeping in mind, and in particular as discussed in our [Goals](#goals), perhaps we should make it easier for web developers to know whether a cloud-based model is in use, or which one.\n\nIf on-device language models are updated separately from browser and operating system versions, this API could enhance the web's fingerprinting service by providing extra identifying bits. Mandating that older browser versions not receive updates or be able to download models from too far into the future might be a possible remediation for this.\n\nFinally, we intend to prohibit (in the specification) any use of user-specific information that is not directly supplied through the API. For example, it would not be permissible to fine-tune the language model based on information the user has entered into the browser in the past.\n\n## Stakeholder feedback\n\n* W3C TAG: not yet requested\n* Browser engines and browsers:\n * Chromium: prototyping behind a flag\n * Gecko: not yet requested\n * WebKit: not yet requested\n * Edge: not yet requested\n* Web developers: positive ([example](https://x.com/mortenjust/status/1805190952358650251), [example](https://tyingshoelaces.com/blog/chrome-ai-prompt-api), [example](https://labs.thinktecture.com/local-small-language-models-in-the-browser-a-first-glance-at-chromes-built-in-ai-and-prompt-api-with-gemini-nano/))\n" }, "w3c": { "group": [