-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
google[minor]: Move Vertex embeddings to integration package #6
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,202 @@ | ||
import { Embeddings, EmbeddingsParams } from "@langchain/core/embeddings"; | ||
import { | ||
AsyncCaller, | ||
AsyncCallerCallOptions, | ||
} from "@langchain/core/utils/async_caller"; | ||
import { chunkArray } from "@langchain/core/utils/chunk_array"; | ||
import { GoogleAIConnection } from "./connection.js"; | ||
import { ApiKeyGoogleAuth, GoogleAbstractedClient } from "./auth.js"; | ||
import { GoogleAIModelRequestParams, GoogleConnectionParams } from "./types.js"; | ||
import { getEnvironmentVariable } from "@langchain/core/utils/env"; | ||
|
||
class EmbeddingsConnection< | ||
CallOptions extends AsyncCallerCallOptions, | ||
AuthOptions | ||
> extends GoogleAIConnection< | ||
CallOptions, | ||
GoogleEmbeddingsInstance[], | ||
AuthOptions | ||
> { | ||
convertSystemMessageToHumanContent: boolean | undefined; | ||
|
||
constructor( | ||
fields: GoogleConnectionParams<AuthOptions> | undefined, | ||
caller: AsyncCaller, | ||
client: GoogleAbstractedClient, | ||
streaming: boolean | ||
) { | ||
super(fields, caller, client, streaming); | ||
} | ||
|
||
async buildUrlMethod(): Promise<string> { | ||
return "predict"; | ||
} | ||
|
||
formatData( | ||
input: GoogleEmbeddingsInstance[], | ||
parameters: GoogleAIModelRequestParams | ||
): unknown { | ||
return { | ||
instances: input, | ||
parameters, | ||
}; | ||
} | ||
} | ||
|
||
/** | ||
* Defines the parameters required to initialize a | ||
* GoogleEmbeddings instance. It extends EmbeddingsParams and | ||
* GoogleConnectionParams. | ||
*/ | ||
export interface BaseGoogleEmbeddingsParams<AuthOptions> | ||
extends EmbeddingsParams, | ||
GoogleConnectionParams<AuthOptions> { | ||
model: string; | ||
} | ||
|
||
/** | ||
* Defines additional options specific to the | ||
* GoogleEmbeddingsInstance. It extends AsyncCallerCallOptions. | ||
*/ | ||
export interface BaseGoogleEmbeddingsOptions extends AsyncCallerCallOptions {} | ||
|
||
/** | ||
* Represents an instance for generating embeddings using the Google | ||
* Vertex AI API. It contains the content to be embedded. | ||
*/ | ||
export interface GoogleEmbeddingsInstance { | ||
content: string; | ||
} | ||
|
||
/** | ||
* Defines the structure of the embeddings results returned by the Google | ||
* Vertex AI API. It extends GoogleBasePrediction and contains the | ||
* embeddings and their statistics. | ||
*/ | ||
export interface BaseGoogleEmbeddingsResults { | ||
embeddings: { | ||
statistics: { | ||
token_count: number; | ||
truncated: boolean; | ||
}; | ||
values: number[]; | ||
}; | ||
} | ||
|
||
/** | ||
* Enables calls to the Google Cloud's Vertex AI API to access | ||
* the embeddings generated by Large Language Models. | ||
* | ||
* To use, you will need to have one of the following authentication | ||
* methods in place: | ||
* - You are logged into an account permitted to the Google Cloud project | ||
* using Vertex AI. | ||
* - You are running this on a machine using a service account permitted to | ||
* the Google Cloud project using Vertex AI. | ||
* - The `GOOGLE_APPLICATION_CREDENTIALS` environment variable is set to the | ||
* path of a credentials file for a service account permitted to the | ||
* Google Cloud project using Vertex AI. | ||
* @example | ||
* ```typescript | ||
* const model = new GoogleEmbeddings(); | ||
* const res = await model.embedQuery( | ||
* "What would be a good company name for a company that makes colorful socks?" | ||
* ); | ||
* console.log({ res }); | ||
* ``` | ||
*/ | ||
export abstract class BaseGoogleEmbeddings<AuthOptions> | ||
extends Embeddings | ||
implements BaseGoogleEmbeddingsParams<AuthOptions> | ||
{ | ||
model: string; | ||
|
||
private connection: GoogleAIConnection< | ||
BaseGoogleEmbeddingsOptions, | ||
GoogleEmbeddingsInstance[], | ||
GoogleConnectionParams<AuthOptions> | ||
>; | ||
|
||
constructor(fields: BaseGoogleEmbeddingsParams<AuthOptions>) { | ||
super(fields); | ||
|
||
this.model = fields.model; | ||
this.connection = new EmbeddingsConnection( | ||
{ ...fields, ...this }, | ||
this.caller, | ||
this.buildClient(fields), | ||
false | ||
); | ||
} | ||
|
||
abstract buildAbstractedClient( | ||
fields?: GoogleConnectionParams<AuthOptions> | ||
): GoogleAbstractedClient; | ||
|
||
buildApiKeyClient(apiKey: string): GoogleAbstractedClient { | ||
return new ApiKeyGoogleAuth(apiKey); | ||
} | ||
|
||
buildApiKey( | ||
fields?: GoogleConnectionParams<AuthOptions> | ||
): string | undefined { | ||
return fields?.apiKey ?? getEnvironmentVariable("GOOGLE_API_KEY"); | ||
} | ||
|
||
buildClient( | ||
fields?: GoogleConnectionParams<AuthOptions> | ||
): GoogleAbstractedClient { | ||
const apiKey = this.buildApiKey(fields); | ||
if (apiKey) { | ||
return this.buildApiKeyClient(apiKey); | ||
} else { | ||
return this.buildAbstractedClient(fields); | ||
} | ||
} | ||
|
||
/** | ||
* Takes an array of documents as input and returns a promise that | ||
* resolves to a 2D array of embeddings for each document. It splits the | ||
* documents into chunks and makes requests to the Google Vertex AI API to | ||
* generate embeddings. | ||
* @param documents An array of documents to be embedded. | ||
* @returns A promise that resolves to a 2D array of embeddings for each document. | ||
*/ | ||
async embedDocuments(documents: string[]): Promise<number[][]> { | ||
const instanceChunks: GoogleEmbeddingsInstance[][] = chunkArray( | ||
documents.map((document) => ({ | ||
content: document, | ||
})), | ||
5 | ||
); // Vertex AI accepts max 5 instances per prediction | ||
const parameters = {}; | ||
const options = {}; | ||
const responses = await Promise.all( | ||
instanceChunks.map((instances) => | ||
this.connection.request(instances, parameters, options) | ||
) | ||
); | ||
const result: number[][] = | ||
responses | ||
?.map( | ||
(response) => | ||
(response?.data as any)?.predictions?.map( | ||
(result: any) => result.embeddings.values | ||
) ?? [] | ||
) | ||
.flat() ?? []; | ||
return result; | ||
} | ||
|
||
/** | ||
* Takes a document as input and returns a promise that resolves to an | ||
* embedding for the document. It calls the embedDocuments method with the | ||
* document as the input. | ||
* @param document A document to be embedded. | ||
* @returns A promise that resolves to an embedding for the document. | ||
*/ | ||
async embedQuery(document: string): Promise<number[]> { | ||
const data = await this.embedDocuments([document]); | ||
return data[0]; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import { | ||
GoogleAbstractedClient, | ||
GoogleConnectionParams, | ||
BaseGoogleEmbeddings, | ||
BaseGoogleEmbeddingsParams, | ||
} from "@langchain/google-common"; | ||
import { GoogleAuthOptions } from "google-auth-library"; | ||
import { GAuthClient } from "./auth.js"; | ||
|
||
/** | ||
* Input to LLM class. | ||
*/ | ||
export interface GoogleEmbeddingsInput | ||
extends BaseGoogleEmbeddingsParams<GoogleAuthOptions> {} | ||
|
||
/** | ||
* Integration with an LLM. | ||
*/ | ||
export class GoogleEmbeddings | ||
extends BaseGoogleEmbeddings<GoogleAuthOptions> | ||
implements GoogleEmbeddingsInput | ||
{ | ||
// Used for tracing, replace with the same name as your class | ||
static lc_name() { | ||
return "GoogleEmbeddings"; | ||
} | ||
|
||
lc_serializable = true; | ||
|
||
constructor(fields: GoogleEmbeddingsInput) { | ||
super(fields); | ||
} | ||
|
||
buildAbstractedClient( | ||
fields?: GoogleConnectionParams<GoogleAuthOptions> | ||
): GoogleAbstractedClient { | ||
return new GAuthClient(fields); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
export * from "./chat_models.js"; | ||
export * from "./llms.js"; | ||
export * from "./embeddings.js"; | ||
Comment on lines
1
to
+3
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be helpful to add inline comments explaining the purpose and functionality of the exported modules. This will make it easier for other developers to understand the code and contribute to it. For example, you could add a comment like
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import { | ||
type GoogleEmbeddingsInput, | ||
GoogleEmbeddings, | ||
} from "@langchain/google-webauth"; | ||
|
||
/** | ||
* Input to chat model class. | ||
*/ | ||
export interface GoogleVertexAIEmbeddingsInput extends GoogleEmbeddingsInput {} | ||
|
||
/** | ||
* Integration with a chat model. | ||
*/ | ||
export class GoogleVertexAIEmbeddings extends GoogleEmbeddings { | ||
Comment on lines
+11
to
+14
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The class comment for GoogleVertexAIEmbeddings is incorrect. It currently states 'Integration with a chat model', but this class is actually for generating embeddings using Google Vertex AI. Please update the comment to accurately reflect the purpose of this class, such as 'Integration with Google Vertex AI for generating embeddings'.
|
||
static lc_name() { | ||
return "GoogleVertexAIEmbeddings"; | ||
} | ||
|
||
constructor(fields: GoogleVertexAIEmbeddingsInput) { | ||
super({ | ||
...fields, | ||
platformType: "gcp", | ||
}); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
export * from "./chat_models.js"; | ||
export * from "./llms.js"; | ||
export * from "./embeddings.js"; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I noticed that a console.log statement has been added which prints out the full URL, including sensitive information like the project ID, endpoint, and model name. This could potentially lead to security vulnerabilities if the logs are exposed. Consider removing this console.log statement or replacing it with a more secure logging method that doesn't expose sensitive information in production environments.