Skip to content

Commit

Permalink
Merge pull request #23 from route06inc/websearch
Browse files Browse the repository at this point in the history
Search and Scrape Webpages
  • Loading branch information
shige authored Oct 18, 2024
2 parents 691edd5 + 6e2876a commit 9b60fc9
Show file tree
Hide file tree
Showing 15 changed files with 1,936 additions and 91 deletions.
9 changes: 9 additions & 0 deletions app/(playground)/p/[agentId]/beta-proto/giselle-node/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,15 @@ export type GiselleNodeArtifactElement = {
properties: Record<string, unknown>;
};

export type GiselleNodeWebSearchElement = {
id: GiselleNodeId;
object: "node.webSearchElement";
name: string;
category: GiselleNodeCategory;
archetype: string;
properties: Record<string, unknown>;
};

export type InferGiselleNodeObject<T extends GiselleNodeBlueprint> = {
id: GiselleNodeId;
object: "node";
Expand Down
39 changes: 31 additions & 8 deletions app/(playground)/p/[agentId]/beta-proto/graph/actions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ import {
import { giselleNodeToGiselleNodeArtifactElement } from "../giselle-node/utils";
import type { TextContent, TextContentReference } from "../text-content/types";
import { generateWebSearchStream } from "../web-search/server-action";
import type { WebSearch } from "../web-search/types";
import type { ThunkAction } from "./context";
import {
generateArtifactStream,
Expand Down Expand Up @@ -536,6 +537,13 @@ export const generateText =
}
}

const node = state.graph.nodes.find(
(node) => node.id === args.textGeneratorNode.id,
);
if (node === undefined) {
/** @todo error handling */
throw new Error("Node not found");
}
switch (instructionConnector.targetNodeArcheType) {
case giselleNodeArchetypes.textGenerator: {
const systemPrompt =
Expand Down Expand Up @@ -596,13 +604,6 @@ ${instructionSources.map((source) => `<Source title="${source.title}" type="${so
const artifact = state.graph.artifacts.find(
(artifact) => artifact.generatorNode.id === args.textGeneratorNode.id,
);
const node = state.graph.nodes.find(
(node) => node.id === args.textGeneratorNode.id,
);
if (node === undefined) {
/** @todo error handling */
throw new Error("Node not found");
}

dispatch(
addOrReplaceArtifact({
Expand Down Expand Up @@ -657,6 +658,7 @@ ${instructionSources.map((source) => `<Source title="${source.title}" type="${so
const { object } = await generateWebSearchStream({
userPrompt: instructionNode.output as string,
systemPrompt,
node,
});
let content: PartialGeneratedObject = {};
for await (const streamContent of readStreamableValue(object)) {
Expand Down Expand Up @@ -708,6 +710,12 @@ ${instructionSources.map((source) => `<Source title="${source.title}" type="${so
},
}),
);
dispatch(
upsertWebSearch({
// biome-ignore lint: lint/suspicious/noExplicitAny be typesafe earlier
webSearch: content as any,
}),
);
break;
}
}
Expand Down Expand Up @@ -1085,6 +1093,20 @@ export function removeSelectedNodesOrFeedback(): ThunkAction {
};
}

interface UpsertWebSearchAction {
type: "upsertWebSearch";
inputs: UpsertWebSearchInputs;
}
interface UpsertWebSearchInputs {
webSearch: WebSearch;
}
function upsertWebSearch(inputs: UpsertWebSearchInputs): UpsertWebSearchAction {
return {
type: "upsertWebSearch",
inputs,
};
}

export type GraphAction =
| AddNodeAction
| RemoveNodeAction
Expand All @@ -1100,4 +1122,5 @@ export type GraphAction =
| AddOrReplaceArtifactAction
| RemoveArtifactAction
| AddParameterToNodeAction
| RemoveParameterFromNodeAction;
| RemoveParameterFromNodeAction
| UpsertWebSearchAction;
21 changes: 21 additions & 0 deletions app/(playground)/p/[agentId]/beta-proto/graph/reducer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,27 @@ export const graphReducer = (
),
},
};
case "upsertWebSearch": {
const isUpdate = state.graph.webSearches.some(
(webSearch) =>
webSearch.generatorNode.id ===
action.inputs.webSearch.generatorNode.id,
);
return {
...state,
graph: {
...state.graph,
webSearches: isUpdate
? state.graph.webSearches.map((webSearch) =>
webSearch.generatorNode.id !==
action.inputs.webSearch.generatorNode.id
? webSearch
: action.inputs.webSearch,
)
: [...state.graph.webSearches, action.inputs.webSearch],
},
};
}
default:
return state;
}
Expand Down
2 changes: 2 additions & 0 deletions app/(playground)/p/[agentId]/beta-proto/graph/types.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import type { Artifact } from "../artifact/types";
import type { ConnectorObject } from "../connector/types";
import type { GiselleNode } from "../giselle-node/types";
import type { WebSearch } from "../web-search/types";

export type Graph = {
nodes: GiselleNode[];
connectors: ConnectorObject[];
artifacts: Artifact[];
webSearches: WebSearch[];
};

export type GraphState = {
Expand Down
44 changes: 25 additions & 19 deletions app/(playground)/p/[agentId]/beta-proto/web-search/schema.ts
Original file line number Diff line number Diff line change
@@ -1,25 +1,31 @@
import { jsonSchema } from "ai";

export const webSearchSchema = jsonSchema<{ plan: string; keywords: string[] }>(
{
$schema: "https://json-schema.org/draft/2020-12/schema",
title: "keyword schema",
type: "object",
properties: {
plan: {
export const webSearchSchema = jsonSchema<{
plan: string;
keywords: string[];
name: string;
}>({
$schema: "https://json-schema.org/draft/2020-12/schema",
title: "keyword schema",
type: "object",
properties: {
plan: {
type: "string",
description: "Describe the plan that you will archive user request",
},
name: {
type: "string",
description: "The name of the web search",
},
keywords: {
type: "array",
items: {
type: "string",
description: "Describe the plan that you will archive user request",
},
keywords: {
type: "array",
items: {
type: "string",
description:
"Suggest appropriate search queries with relevant keywords at least 3-5 words long",
},
description: "The keywords to search for user request",
description:
"Suggest appropriate search queries with relevant keywords at least 3-5 words long",
},
description: "The keywords to search for user request",
},
required: ["plan", "keywords"],
},
);
required: ["plan", "name", "keywords"],
});
130 changes: 70 additions & 60 deletions app/(playground)/p/[agentId]/beta-proto/web-search/server-action.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,22 @@

import { getUserSubscriptionId, isRoute06User } from "@/app/(auth)/lib";
import { openai } from "@ai-sdk/openai";
import FirecrawlApp from "@mendable/firecrawl-js";
import { metrics } from "@opentelemetry/api";
import { createId } from "@paralleldrive/cuid2";
import { put } from "@vercel/blob";
import { streamObject } from "ai";
import { createStreamableValue } from "ai/rsc";
import Langfuse from "langfuse";
import type { GiselleNode } from "../giselle-node/types";
import { webSearchSchema } from "./schema";

const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
import { search } from "./tavily";
import { type WebSearch, webSearchItemStatus, webSearchStatus } from "./types";

interface GenerateWebSearchStreamInputs {
userPrompt: string;
systemPrompt?: string;
node: GiselleNode;
}
export async function generateWebSearchStream(
inputs: GenerateWebSearchStreamInputs,
Expand Down Expand Up @@ -57,74 +62,79 @@ export async function generateWebSearchStream(

const result = await object;

await sleep(500);
stream.update({
...result,
webSearch: {
name: "Why Deno is the best choice for biginner",
},
});
const searchResults = await Promise.all(
result.keywords.map((keyword) => search(keyword)),
).then((results) => [...new Set(results.flat())]);

await sleep(1000);
stream.update({
...result,
webSearch: {
name: "Why Deno is the best choice for biginner",
items: [
{
id: "wbs.cnt_1",
title: "Deno vs Node.js: A Detailed Comparison",
url: "https://www.freecodecamp.org/news/deno-vs-node-js/",
status: "pending",
},
],
const webSearch: WebSearch = {
id: `wbs_${createId()}`,
generatorNode: {
...inputs.node,
object: "node.webSearchElement",
},
});
object: "webSearch",
name: result.name,
status: "pending",
items: searchResults.map((searchResult) => ({
id: `wbs.cnt_${createId()}`,
object: "webSearch.item.reference",
title: searchResult.title,
url: searchResult.url,
status: "pending",
})),
};

await sleep(1000);
stream.update({
...result,
webSearch: {
name: "Why Deno is the best choice for biginner",
items: [
{
id: "wbs.cnt_1",
title: "Deno Beginner",
url: "https://denobeginner.com/",
status: "completed",
},
{
id: "wbs.cnt_2",
title: "Intro to Deno – Guide for Beginners",
url: "https://www.freecodecamp.org/news/intro-to-deno/",
status: "processing",
},
],
},
webSearch,
});
await sleep(1000);

if (process.env.FIRECRAWL_API_KEY === undefined) {
throw new Error("FIRECRAWL_API_KEY is not set");
}
const app = new FirecrawlApp({ apiKey: process.env.FIRECRAWL_API_KEY });
let mutableItems = webSearch.items;
await Promise.all(
webSearch.items.map(async (webSearchItem) => {
const scrapeResponse = await app.scrapeUrl(webSearchItem.url, {
formats: ["markdown"],
});
if (scrapeResponse.success) {
const blob = await put(
`webSearch/${webSearchItem.id}.md`,
scrapeResponse.markdown ?? "",
{
access: "public",
contentType: "text/markdown",
},
);
mutableItems = mutableItems.map((item) => {
if (item.id !== webSearchItem.id) {
return item;
}
return {
...webSearchItem,
contentBlobUrl: blob.url,
status: webSearchItemStatus.completed,
};
});
stream.update({
...result,
webSearch: {
...webSearch,
items: mutableItems,
},
});
}
}),
);
stream.update({
...result,
webSearch: {
name: "Why Deno is the best choice for biginner",
items: [
{
id: "wbs.cnt_1",
title: "Deno Beginner",
url: "https://denobeginner.com/",
status: "completed",
},
{
id: "wbs.cnt_2",
title: "Intro to Deno – Guide for Beginners",
url: "https://www.freecodecamp.org/news/intro-to-deno/",
status: "completed",
},
],
status: "completed",
...webSearch,
status: webSearchStatus.completed,
items: mutableItems,
},
description:
"Deno is a runtime for JavaScript and TypeScript that is based on the V8 JavaScript engine and the Rust programming language. It was created by Ryan Dahl, the original creator of Node.js, and was designed to address some of the shortcomings of Node.js. Deno is designed to be secure by default, with no file, network, or environment access unless explicitly enabled. It also has built-in support for TypeScript, which makes it easier to write and maintain large codebases. Deno is still relatively new compared to Node.js, but it has been gaining popularity among developers who are looking for a more secure and modern alternative to Node.js.",
});

stream.done();
Expand Down
Loading

0 comments on commit 9b60fc9

Please sign in to comment.