Skip to content

Commit

Permalink
core[minor]: Add XML output parser (#4258)
Browse files Browse the repository at this point in the history
* core[minor]: Add XML output parser

* cr

* docs

* chore: lint files

* cr

* streaming & docs

* cr

* chore: lint files
  • Loading branch information
bracesproul committed Feb 5, 2024
1 parent 8ae8fe3 commit cbd3e96
Show file tree
Hide file tree
Showing 9 changed files with 511 additions and 2 deletions.
29 changes: 29 additions & 0 deletions docs/core_docs/docs/modules/model_io/output_parsers/types/xml.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# XML output parser

The `XMLOutputParser` takes language model output which contains XML and parses it into a JSON object.

The output parser also supports streaming outputs.

Currently, the XML parser does not contain support for self closing tags, or attributes on tags.

## Usage

import CodeBlock from "@theme/CodeBlock";

import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx";

<IntegrationInstallTooltip></IntegrationInstallTooltip>

```bash npm2yarn
npm install @langchain/core
```

import XMLExample from "@examples/prompts/xml_output_parser.ts";

<CodeBlock language="typescript">{XMLExample}</CodeBlock>

## Streaming

import XMLStreamingExample from "@examples/prompts/xml_output_parser_streaming.ts";

<CodeBlock language="typescript">{XMLStreamingExample}</CodeBlock>
67 changes: 67 additions & 0 deletions examples/src/prompts/xml_output_parser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import { XMLOutputParser } from "@langchain/core/output_parsers";

const XML_EXAMPLE = `<?xml version="1.0" encoding="UTF-8"?>
<userProfile>
<userID>12345</userID>
<name>John Doe</name>
<email>john.doe@example.com</email>
<roles>
<role>Admin</role>
<role>User</role>
</roles>
<preferences>
<theme>Dark</theme>
<notifications>
<email>true</email>
<sms>false</sms>
</notifications>
</preferences>
</userProfile>`;

const parser = new XMLOutputParser();

const result = await parser.invoke(XML_EXAMPLE);

console.log(JSON.stringify(result, null, 2));
/*
{
"userProfile": [
{
"userID": "12345"
},
{
"name": "John Doe"
},
{
"email": "john.doe@example.com"
},
{
"roles": [
{
"role": "Admin"
},
{
"role": "User"
}
]
},
{
"preferences": [
{
"theme": "Dark"
},
{
"notifications": [
{
"email": "true"
},
{
"sms": "false"
}
]
}
]
}
]
}
*/
118 changes: 118 additions & 0 deletions examples/src/prompts/xml_output_parser_streaming.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import { XMLOutputParser } from "@langchain/core/output_parsers";
import { FakeStreamingLLM } from "@langchain/core/utils/testing";

const XML_EXAMPLE = `<?xml version="1.0" encoding="UTF-8"?>
<userProfile>
<userID>12345</userID>
<roles>
<role>Admin</role>
<role>User</role>
</roles>
</userProfile>`;

const parser = new XMLOutputParser();

// Define your LLM, in this example we'll use demo streaming LLM
const streamingLLM = new FakeStreamingLLM({
responses: [XML_EXAMPLE],
}).pipe(parser); // Pipe the parser to the LLM

const stream = await streamingLLM.stream(XML_EXAMPLE);
for await (const chunk of stream) {
console.log(JSON.stringify(chunk, null, 2));
}
/*
{}
{
"userProfile": ""
}
{
"userProfile": "\n"
}
{
"userProfile": [
{
"userID": ""
}
]
}
{
"userProfile": [
{
"userID": "123"
}
]
}
{
"userProfile": [
{
"userID": "12345"
},
{
"roles": ""
}
]
}
{
"userProfile": [
{
"userID": "12345"
},
{
"roles": [
{
"role": "A"
}
]
}
]
}
{
"userProfile": [
{
"userID": "12345"
},
{
"roles": [
{
"role": "Admi"
}
]
}
]
}
{
"userProfile": [
{
"userID": "12345"
},
{
"roles": [
{
"role": "Admin"
},
{
"role": "U"
}
]
}
]
}
{
"userProfile": [
{
"userID": "12345"
},
{
"roles": [
{
"role": "Admin"
},
{
"role": "User"
}
]
}
]
}
*/
2 changes: 2 additions & 0 deletions langchain-core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
"ml-distance": "^4.0.0",
"p-queue": "^6.6.2",
"p-retry": "4",
"sax": "^1.3.0",
"uuid": "^9.0.0",
"zod": "^3.22.4",
"zod-to-json-schema": "^3.22.3"
Expand All @@ -55,6 +56,7 @@
"@langchain/scripts": "~0.0",
"@swc/core": "^1.3.90",
"@swc/jest": "^0.2.29",
"@types/sax": "^1",
"dpdm": "^3.12.0",
"eslint": "^8.33.0",
"eslint-config-airbnb-base": "^15.0.0",
Expand Down
1 change: 1 addition & 0 deletions langchain-core/src/output_parsers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ export * from "./list.js";
export * from "./string.js";
export * from "./transform.js";
export * from "./json.js";
export * from "./xml.js";
2 changes: 1 addition & 1 deletion langchain-core/src/output_parsers/json.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ export function parseJsonMarkdown(s: string, parser = parsePartialJson) {
}
}

// Adapted from https://github.com/KillianLucas/open-interpreter/blob/main/interpreter/utils/parse_partial_json.py
// Adapted from https://github.com/KillianLucas/open-interpreter/blob/main/interpreter/core/llm/utils/parse_partial_json.py
// MIT License
export function parsePartialJson(s: string) {
// If the input is undefined, return null to indicate failure.
Expand Down
93 changes: 93 additions & 0 deletions langchain-core/src/output_parsers/tests/xml.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import { FakeStreamingLLM } from "../../utils/testing/index.js";
import { XMLOutputParser } from "../xml.js";

const XML_EXAMPLE = `<?xml version="1.0" encoding="UTF-8"?>
<userProfile>
<userID>12345</userID>
<email>john.doe@example.com</email>
<roles>
<role>Admin</role>
<role>User</role>
</roles>
<preferences>
<theme>Dark</theme>
<notifications>
<email>true</email>
</notifications>
</preferences>
</userProfile>`;

const BACKTICK_WRAPPED_XML = `\`\`\`xml\n${XML_EXAMPLE}\n\`\`\``;

const expectedResult = {
userProfile: [
{
userID: "12345",
},
{
email: "john.doe@example.com",
},
{
roles: [
{
role: "Admin",
},
{
role: "User",
},
],
},
{
preferences: [
{
theme: "Dark",
},
{
notifications: [
{
email: "true",
},
],
},
],
},
],
};

test("Can parse XML", async () => {
const parser = new XMLOutputParser();

const result = await parser.invoke(XML_EXAMPLE);
expect(result).toStrictEqual(expectedResult);
});

test("Can parse backtick wrapped XML", async () => {
const parser = new XMLOutputParser();

const result = await parser.invoke(BACKTICK_WRAPPED_XML);
expect(result).toStrictEqual(expectedResult);
});

test("Can format instructions with passed tags.", async () => {
const tags = ["tag1", "tag2", "tag3"];
const parser = new XMLOutputParser({ tags });

const formatInstructions = parser.getFormatInstructions();

expect(formatInstructions).toContain("tag1, tag2, tag3");
});

test("Can parse streams", async () => {
const parser = new XMLOutputParser();
const streamingLlm = new FakeStreamingLLM({
responses: [XML_EXAMPLE],
}).pipe(parser);

const result = await streamingLlm.stream(XML_EXAMPLE);
let finalResult = {};
for await (const chunk of result) {
console.log(chunk);
finalResult = chunk;
}
expect(finalResult).toStrictEqual(expectedResult);
});
Loading

0 comments on commit cbd3e96

Please sign in to comment.