Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Search and Scrape Webpages #23

Merged
merged 4 commits into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,15 @@ export type GiselleNodeArtifactElement = {
properties: Record<string, unknown>;
};

export type GiselleNodeWebSearchElement = {
id: GiselleNodeId;
object: "node.webSearchElement";
name: string;
category: GiselleNodeCategory;
archetype: string;
properties: Record<string, unknown>;
};

export type InferGiselleNodeObject<T extends GiselleNodeBlueprint> = {
id: GiselleNodeId;
object: "node";
Expand Down
39 changes: 31 additions & 8 deletions app/(playground)/p/[agentId]/beta-proto/graph/actions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ import {
import { giselleNodeToGiselleNodeArtifactElement } from "../giselle-node/utils";
import type { TextContent, TextContentReference } from "../text-content/types";
import { generateWebSearchStream } from "../web-search/server-action";
import type { WebSearch } from "../web-search/types";
import type { ThunkAction } from "./context";
import {
generateArtifactStream,
Expand Down Expand Up @@ -536,6 +537,13 @@ export const generateText =
}
}

const node = state.graph.nodes.find(
(node) => node.id === args.textGeneratorNode.id,
);
if (node === undefined) {
/** @todo error handling */
throw new Error("Node not found");
}
switch (instructionConnector.targetNodeArcheType) {
case giselleNodeArchetypes.textGenerator: {
const systemPrompt =
Expand Down Expand Up @@ -596,13 +604,6 @@ ${instructionSources.map((source) => `<Source title="${source.title}" type="${so
const artifact = state.graph.artifacts.find(
(artifact) => artifact.generatorNode.id === args.textGeneratorNode.id,
);
const node = state.graph.nodes.find(
(node) => node.id === args.textGeneratorNode.id,
);
if (node === undefined) {
/** @todo error handling */
throw new Error("Node not found");
}

dispatch(
addOrReplaceArtifact({
Expand Down Expand Up @@ -657,6 +658,7 @@ ${instructionSources.map((source) => `<Source title="${source.title}" type="${so
const { object } = await generateWebSearchStream({
userPrompt: instructionNode.output as string,
systemPrompt,
node,
});
let content: PartialGeneratedObject = {};
for await (const streamContent of readStreamableValue(object)) {
Expand Down Expand Up @@ -708,6 +710,12 @@ ${instructionSources.map((source) => `<Source title="${source.title}" type="${so
},
}),
);
dispatch(
upsertWebSearch({
// biome-ignore lint: lint/suspicious/noExplicitAny be typesafe earlier
webSearch: content as any,
}),
);
break;
}
}
Expand Down Expand Up @@ -1085,6 +1093,20 @@ export function removeSelectedNodesOrFeedback(): ThunkAction {
};
}

interface UpsertWebSearchAction {
type: "upsertWebSearch";
inputs: UpsertWebSearchInputs;
}
interface UpsertWebSearchInputs {
webSearch: WebSearch;
}
function upsertWebSearch(inputs: UpsertWebSearchInputs): UpsertWebSearchAction {
return {
type: "upsertWebSearch",
inputs,
};
}

export type GraphAction =
| AddNodeAction
| RemoveNodeAction
Expand All @@ -1100,4 +1122,5 @@ export type GraphAction =
| AddOrReplaceArtifactAction
| RemoveArtifactAction
| AddParameterToNodeAction
| RemoveParameterFromNodeAction;
| RemoveParameterFromNodeAction
| UpsertWebSearchAction;
21 changes: 21 additions & 0 deletions app/(playground)/p/[agentId]/beta-proto/graph/reducer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,27 @@ export const graphReducer = (
),
},
};
case "upsertWebSearch": {
const isUpdate = state.graph.webSearches.some(
(webSearch) =>
webSearch.generatorNode.id ===
action.inputs.webSearch.generatorNode.id,
);
return {
...state,
graph: {
...state.graph,
webSearches: isUpdate
? state.graph.webSearches.map((webSearch) =>
webSearch.generatorNode.id !==
action.inputs.webSearch.generatorNode.id
? webSearch
: action.inputs.webSearch,
)
: [...state.graph.webSearches, action.inputs.webSearch],
},
};
}
default:
return state;
}
Expand Down
2 changes: 2 additions & 0 deletions app/(playground)/p/[agentId]/beta-proto/graph/types.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import type { Artifact } from "../artifact/types";
import type { ConnectorObject } from "../connector/types";
import type { GiselleNode } from "../giselle-node/types";
import type { WebSearch } from "../web-search/types";

export type Graph = {
nodes: GiselleNode[];
connectors: ConnectorObject[];
artifacts: Artifact[];
webSearches: WebSearch[];
};

export type GraphState = {
Expand Down
44 changes: 25 additions & 19 deletions app/(playground)/p/[agentId]/beta-proto/web-search/schema.ts
Original file line number Diff line number Diff line change
@@ -1,25 +1,31 @@
import { jsonSchema } from "ai";

export const webSearchSchema = jsonSchema<{ plan: string; keywords: string[] }>(
{
$schema: "https://json-schema.org/draft/2020-12/schema",
title: "keyword schema",
type: "object",
properties: {
plan: {
export const webSearchSchema = jsonSchema<{
plan: string;
keywords: string[];
name: string;
}>({
$schema: "https://json-schema.org/draft/2020-12/schema",
title: "keyword schema",
type: "object",
properties: {
plan: {
type: "string",
description: "Describe the plan that you will archive user request",
},
name: {
type: "string",
description: "The name of the web search",
},
keywords: {
type: "array",
items: {
type: "string",
description: "Describe the plan that you will archive user request",
},
keywords: {
type: "array",
items: {
type: "string",
description:
"Suggest appropriate search queries with relevant keywords at least 3-5 words long",
},
description: "The keywords to search for user request",
description:
"Suggest appropriate search queries with relevant keywords at least 3-5 words long",
},
description: "The keywords to search for user request",
},
required: ["plan", "keywords"],
},
);
required: ["plan", "name", "keywords"],
});
130 changes: 70 additions & 60 deletions app/(playground)/p/[agentId]/beta-proto/web-search/server-action.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,22 @@

import { getUserSubscriptionId, isRoute06User } from "@/app/(auth)/lib";
import { openai } from "@ai-sdk/openai";
import FirecrawlApp from "@mendable/firecrawl-js";
import { metrics } from "@opentelemetry/api";
import { createId } from "@paralleldrive/cuid2";
import { put } from "@vercel/blob";
import { streamObject } from "ai";
import { createStreamableValue } from "ai/rsc";
import Langfuse from "langfuse";
import type { GiselleNode } from "../giselle-node/types";
import { webSearchSchema } from "./schema";

const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
import { search } from "./tavily";
import { type WebSearch, webSearchItemStatus, webSearchStatus } from "./types";

interface GenerateWebSearchStreamInputs {
userPrompt: string;
systemPrompt?: string;
node: GiselleNode;
}
export async function generateWebSearchStream(
inputs: GenerateWebSearchStreamInputs,
Expand Down Expand Up @@ -57,74 +62,79 @@ export async function generateWebSearchStream(

const result = await object;

await sleep(500);
stream.update({
...result,
webSearch: {
name: "Why Deno is the best choice for biginner",
},
});
const searchResults = await Promise.all(
result.keywords.map((keyword) => search(keyword)),
).then((results) => [...new Set(results.flat())]);

await sleep(1000);
stream.update({
...result,
webSearch: {
name: "Why Deno is the best choice for biginner",
items: [
{
id: "wbs.cnt_1",
title: "Deno vs Node.js: A Detailed Comparison",
url: "https://www.freecodecamp.org/news/deno-vs-node-js/",
status: "pending",
},
],
const webSearch: WebSearch = {
id: `wbs_${createId()}`,
generatorNode: {
...inputs.node,
object: "node.webSearchElement",
},
});
object: "webSearch",
name: result.name,
status: "pending",
items: searchResults.map((searchResult) => ({
id: `wbs.cnt_${createId()}`,
object: "webSearch.item.reference",
title: searchResult.title,
url: searchResult.url,
status: "pending",
})),
};

await sleep(1000);
stream.update({
...result,
webSearch: {
name: "Why Deno is the best choice for biginner",
items: [
{
id: "wbs.cnt_1",
title: "Deno Beginner",
url: "https://denobeginner.com/",
status: "completed",
},
{
id: "wbs.cnt_2",
title: "Intro to Deno – Guide for Beginners",
url: "https://www.freecodecamp.org/news/intro-to-deno/",
status: "processing",
},
],
},
webSearch,
});
await sleep(1000);

if (process.env.FIRECRAWL_API_KEY === undefined) {
throw new Error("FIRECRAWL_API_KEY is not set");
}
const app = new FirecrawlApp({ apiKey: process.env.FIRECRAWL_API_KEY });
let mutableItems = webSearch.items;
await Promise.all(
webSearch.items.map(async (webSearchItem) => {
const scrapeResponse = await app.scrapeUrl(webSearchItem.url, {
formats: ["markdown"],
});
if (scrapeResponse.success) {
const blob = await put(
`webSearch/${webSearchItem.id}.md`,
scrapeResponse.markdown ?? "",
{
access: "public",
contentType: "text/markdown",
},
);
mutableItems = mutableItems.map((item) => {
if (item.id !== webSearchItem.id) {
return item;
}
return {
...webSearchItem,
contentBlobUrl: blob.url,
status: webSearchItemStatus.completed,
};
});
stream.update({
...result,
webSearch: {
...webSearch,
items: mutableItems,
},
});
}
}),
);
stream.update({
...result,
webSearch: {
name: "Why Deno is the best choice for biginner",
items: [
{
id: "wbs.cnt_1",
title: "Deno Beginner",
url: "https://denobeginner.com/",
status: "completed",
},
{
id: "wbs.cnt_2",
title: "Intro to Deno – Guide for Beginners",
url: "https://www.freecodecamp.org/news/intro-to-deno/",
status: "completed",
},
],
status: "completed",
...webSearch,
status: webSearchStatus.completed,
items: mutableItems,
},
description:
"Deno is a runtime for JavaScript and TypeScript that is based on the V8 JavaScript engine and the Rust programming language. It was created by Ryan Dahl, the original creator of Node.js, and was designed to address some of the shortcomings of Node.js. Deno is designed to be secure by default, with no file, network, or environment access unless explicitly enabled. It also has built-in support for TypeScript, which makes it easier to write and maintain large codebases. Deno is still relatively new compared to Node.js, but it has been gaining popularity among developers who are looking for a more secure and modern alternative to Node.js.",
});

stream.done();
Expand Down
Loading