Skip to content

Commit dc80022

Browse files
committed
✨ feat: add vertex ai model runtime
1 parent df082e5 commit dc80022

File tree

9 files changed

+295
-3
lines changed

9 files changed

+295
-3
lines changed

package.json

+1
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@
119119
"@codesandbox/sandpack-react": "^2.19.10",
120120
"@cyntler/react-doc-viewer": "^1.17.0",
121121
"@electric-sql/pglite": "0.2.13",
122+
"@google-cloud/vertexai": "^1.9.0",
122123
"@google/generative-ai": "^0.21.0",
123124
"@huggingface/inference": "^2.8.1",
124125
"@icons-pack/react-simple-icons": "9.6.0",
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import { readFileSync } from 'node:fs';
2+
import { resolve } from 'node:path';
3+
4+
import { getLLMConfig } from '@/config/llm';
5+
import { AgentRuntime, ModelProvider } from '@/libs/agent-runtime';
6+
import { LobeVertexAI } from '@/libs/agent-runtime/vertexai';
7+
import { safeParseJSON } from '@/utils/safeParseJSON';
8+
9+
import { POST as UniverseRoute } from '../[provider]/route';
10+
11+
export const POST = async (req: Request) =>
12+
UniverseRoute(req, {
13+
createRuntime: () => {
14+
const {
15+
VERTEXAI_PROJECT,
16+
VERTEXAI_LOCATION,
17+
VERTEXAI_CREDENTIALS,
18+
VERTEXAI_CREDENTIALS_PATH,
19+
} = getLLMConfig();
20+
21+
const credentialsContent =
22+
VERTEXAI_CREDENTIALS ??
23+
(VERTEXAI_CREDENTIALS_PATH
24+
? readFileSync(resolve(process.cwd(), VERTEXAI_CREDENTIALS_PATH), 'utf8')
25+
: undefined);
26+
27+
const googleAuthOptions = credentialsContent ? safeParseJSON(credentialsContent) : undefined;
28+
29+
const instance = LobeVertexAI.initFromVertexAI({
30+
googleAuthOptions: googleAuthOptions,
31+
location: VERTEXAI_LOCATION,
32+
project: VERTEXAI_PROJECT,
33+
});
34+
35+
return new AgentRuntime(instance);
36+
},
37+
params: { provider: ModelProvider.VertexAI },
38+
});

src/config/modelProviders/index.ts

+3
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import StepfunProvider from './stepfun';
3333
import TaichuProvider from './taichu';
3434
import TogetherAIProvider from './togetherai';
3535
import UpstageProvider from './upstage';
36+
import VertexAIProvider from './vertexai';
3637
import WenxinProvider from './wenxin';
3738
import XAIProvider from './xai';
3839
import ZeroOneProvider from './zeroone';
@@ -87,6 +88,7 @@ export const DEFAULT_MODEL_PROVIDER_LIST = [
8788
AnthropicProvider,
8889
BedrockProvider,
8990
GoogleProvider,
91+
VertexAIProvider,
9092
DeepSeekProvider,
9193
HuggingFaceProvider,
9294
OpenRouterProvider,
@@ -162,6 +164,7 @@ export { default as StepfunProviderCard } from './stepfun';
162164
export { default as TaichuProviderCard } from './taichu';
163165
export { default as TogetherAIProviderCard } from './togetherai';
164166
export { default as UpstageProviderCard } from './upstage';
167+
export { default as VertexAIProviderCard } from './vertexai';
165168
export { default as WenxinProviderCard } from './wenxin';
166169
export { default as XAIProviderCard } from './xai';
167170
export { default as ZeroOneProviderCard } from './zeroone';

src/config/modelProviders/vertexai.ts

+217
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
import { ModelProviderCard } from '@/types/llm';
2+
3+
// ref: https://ai.google.dev/gemini-api/docs/models/gemini
4+
const VertexAI: ModelProviderCard = {
5+
chatModels: [
6+
{
7+
description:
8+
'Gemini 1.5 Flash 是Google最新的多模态AI模型,具备快速处理能力,支持文本、图像和视频输入,适用于多种任务的高效扩展。',
9+
displayName: 'Gemini 1.5 Flash',
10+
enabled: true,
11+
functionCall: true,
12+
id: 'gemini-1.5-flash-latest',
13+
maxOutput: 8192,
14+
pricing: {
15+
cachedInput: 0.018_75,
16+
input: 0.075,
17+
output: 0.3,
18+
},
19+
tokens: 1_000_000 + 8192,
20+
vision: true,
21+
},
22+
{
23+
description: 'Gemini 1.5 Flash 002 是一款高效的多模态模型,支持广泛应用的扩展。',
24+
displayName: 'Gemini 1.5 Flash 002',
25+
enabled: true,
26+
functionCall: true,
27+
id: 'gemini-1.5-flash-002',
28+
maxOutput: 8192,
29+
pricing: {
30+
cachedInput: 0.018_75,
31+
input: 0.075,
32+
output: 0.3,
33+
},
34+
releasedAt: '2024-09-25',
35+
tokens: 1_000_000 + 8192,
36+
vision: true,
37+
},
38+
{
39+
description: 'Gemini 1.5 Flash 001 是一款高效的多模态模型,支持广泛应用的扩展。',
40+
displayName: 'Gemini 1.5 Flash 001',
41+
functionCall: true,
42+
id: 'gemini-1.5-flash-001',
43+
maxOutput: 8192,
44+
pricing: {
45+
cachedInput: 0.018_75,
46+
input: 0.075,
47+
output: 0.3,
48+
},
49+
tokens: 1_000_000 + 8192,
50+
vision: true,
51+
},
52+
{
53+
description: 'Gemini 1.5 Flash 0827 提供了优化后的多模态处理能力,适用多种复杂任务场景。',
54+
displayName: 'Gemini 1.5 Flash 0827',
55+
functionCall: true,
56+
id: 'gemini-1.5-flash-exp-0827',
57+
maxOutput: 8192,
58+
pricing: {
59+
cachedInput: 0.018_75,
60+
input: 0.075,
61+
output: 0.3,
62+
},
63+
releasedAt: '2024-08-27',
64+
tokens: 1_000_000 + 8192,
65+
vision: true,
66+
},
67+
68+
{
69+
description:
70+
'Gemini 1.5 Flash 8B 0924 是最新的实验性模型,在文本和多模态用例中都有显著的性能提升。',
71+
displayName: 'Gemini 1.5 Flash 8B 0924',
72+
functionCall: true,
73+
id: 'gemini-1.5-flash-8b-exp-0924',
74+
maxOutput: 8192,
75+
pricing: {
76+
cachedInput: 0.018_75,
77+
input: 0.075,
78+
output: 0.3,
79+
},
80+
releasedAt: '2024-09-24',
81+
tokens: 1_000_000 + 8192,
82+
vision: true,
83+
},
84+
{
85+
description:
86+
'Gemini 1.5 Pro 支持高达200万个tokens,是中型多模态模型的理想选择,适用于复杂任务的多方面支持。',
87+
displayName: 'Gemini 1.5 Pro',
88+
enabled: true,
89+
functionCall: true,
90+
id: 'gemini-1.5-pro-latest',
91+
maxOutput: 8192,
92+
pricing: {
93+
cachedInput: 0.875,
94+
input: 3.5,
95+
output: 10.5,
96+
},
97+
releasedAt: '2024-02-15',
98+
tokens: 2_000_000 + 8192,
99+
vision: true,
100+
},
101+
{
102+
description:
103+
'Gemini 1.5 Pro 002 是最新的生产就绪模型,提供更高质量的输出,特别在数学、长上下文和视觉任务方面有显著提升。',
104+
displayName: 'Gemini 1.5 Pro 002',
105+
enabled: true,
106+
functionCall: true,
107+
id: 'gemini-1.5-pro-002',
108+
maxOutput: 8192,
109+
pricing: {
110+
cachedInput: 0.315,
111+
input: 1.25,
112+
output: 2.5,
113+
},
114+
releasedAt: '2024-09-24',
115+
tokens: 2_000_000 + 8192,
116+
vision: true,
117+
},
118+
{
119+
description: 'Gemini 1.5 Pro 001 是可扩展的多模态AI解决方案,支持广泛的复杂任务。',
120+
displayName: 'Gemini 1.5 Pro 001',
121+
functionCall: true,
122+
id: 'gemini-1.5-pro-001',
123+
maxOutput: 8192,
124+
pricing: {
125+
cachedInput: 0.875,
126+
input: 3.5,
127+
output: 10.5,
128+
},
129+
releasedAt: '2024-02-15',
130+
tokens: 2_000_000 + 8192,
131+
vision: true,
132+
},
133+
{
134+
description: 'Gemini 1.5 Pro 0827 结合最新优化技术,带来更高效的多模态数据处理能力。',
135+
displayName: 'Gemini 1.5 Pro 0827',
136+
functionCall: true,
137+
id: 'gemini-1.5-pro-exp-0827',
138+
maxOutput: 8192,
139+
pricing: {
140+
cachedInput: 0.875,
141+
input: 3.5,
142+
output: 10.5,
143+
},
144+
releasedAt: '2024-08-27',
145+
tokens: 2_000_000 + 8192,
146+
vision: true,
147+
},
148+
{
149+
description: 'Gemini 1.5 Pro 0801 提供出色的多模态处理能力,为应用开发带来更大灵活性。',
150+
displayName: 'Gemini 1.5 Pro 0801',
151+
functionCall: true,
152+
id: 'gemini-1.5-pro-exp-0801',
153+
maxOutput: 8192,
154+
pricing: {
155+
cachedInput: 0.875,
156+
input: 3.5,
157+
output: 10.5,
158+
},
159+
releasedAt: '2024-08-01',
160+
tokens: 2_000_000 + 8192,
161+
vision: true,
162+
},
163+
{
164+
description: 'Gemini 1.0 Pro 是Google的高性能AI模型,专为广泛任务扩展而设计。',
165+
displayName: 'Gemini 1.0 Pro',
166+
id: 'gemini-1.0-pro-latest',
167+
maxOutput: 2048,
168+
pricing: {
169+
input: 0.5,
170+
output: 1.5,
171+
},
172+
releasedAt: '2023-12-06',
173+
tokens: 30_720 + 2048,
174+
},
175+
{
176+
description:
177+
'Gemini 1.0 Pro 001 (Tuning) 提供稳定并可调优的性能,是复杂任务解决方案的理想选择。',
178+
displayName: 'Gemini 1.0 Pro 001 (Tuning)',
179+
functionCall: true,
180+
id: 'gemini-1.0-pro-001',
181+
maxOutput: 2048,
182+
pricing: {
183+
input: 0.5,
184+
output: 1.5,
185+
},
186+
releasedAt: '2023-12-06',
187+
tokens: 30_720 + 2048,
188+
},
189+
{
190+
description: 'Gemini 1.0 Pro 002 (Tuning) 提供出色的多模态支持,专注于复杂任务的有效解决。',
191+
displayName: 'Gemini 1.0 Pro 002 (Tuning)',
192+
id: 'gemini-1.0-pro-002',
193+
maxOutput: 2048,
194+
pricing: {
195+
input: 0.5,
196+
output: 1.5,
197+
},
198+
releasedAt: '2023-12-06',
199+
tokens: 30_720 + 2048,
200+
},
201+
],
202+
checkModel: 'gemini-1.5-flash-latest',
203+
description:
204+
'Google 的 Gemini 系列是其最先进、通用的 AI模型,由 Google DeepMind 打造,专为多模态设计,支持文本、代码、图像、音频和视频的无缝理解与处理。适用于从数据中心到移动设备的多种环境,极大提升了AI模型的效率与应用广泛性。',
205+
id: 'vertexai',
206+
modelsUrl: 'https://cloud.google.com/vertex-ai/generative-ai/docs/learn/model-versioning',
207+
name: 'VertexAI',
208+
showApiKey: false,
209+
showChecker: false,
210+
smoothing: {
211+
speed: 2,
212+
text: true,
213+
},
214+
url: 'https://cloud.google.com/vertex-ai',
215+
};
216+
217+
export default VertexAI;

src/libs/agent-runtime/error.ts

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ export const AgentRuntimeErrorType = {
1313
OllamaBizError: 'OllamaBizError',
1414

1515
InvalidBedrockCredentials: 'InvalidBedrockCredentials',
16+
InvalidVertexCredentials: 'InvalidVertexCredentials',
1617
StreamChunkError: 'StreamChunkError',
1718

1819
InvalidGithubToken: 'InvalidGithubToken',

src/libs/agent-runtime/google/index.ts

+10-3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import type { VertexAI } from '@google-cloud/vertexai';
12
import {
23
Content,
34
FunctionCallPart,
@@ -37,17 +38,23 @@ enum HarmCategory {
3738
enum HarmBlockThreshold {
3839
BLOCK_NONE = 'BLOCK_NONE',
3940
OFF = 'OFF', // https://discuss.ai.google.dev/t/59352
41+
}
42+
43+
interface LobeGoogleAIParams {
44+
apiKey?: string;
45+
baseURL?: string;
46+
client?: GoogleGenerativeAI | VertexAI;
4047
}
4148

4249
export class LobeGoogleAI implements LobeRuntimeAI {
4350
private client: GoogleGenerativeAI;
4451
baseURL?: string;
4552

46-
constructor({ apiKey, baseURL }: { apiKey?: string; baseURL?: string } = {}) {
53+
constructor({ apiKey, baseURL, client }: LobeGoogleAIParams = {}) {
4754
if (!apiKey) throw AgentRuntimeError.createError(AgentRuntimeErrorType.InvalidProviderAPIKey);
4855

49-
this.client = new GoogleGenerativeAI(apiKey);
50-
this.baseURL = baseURL;
56+
this.client = client ? (client as GoogleGenerativeAI) : new GoogleGenerativeAI(apiKey);
57+
this.baseURL = client ? undefined : baseURL;
5158
}
5259

5360
async chat(rawPayload: ChatStreamPayload, options?: ChatCompetitionOptions) {

src/libs/agent-runtime/types/type.ts

+1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ export enum ModelProvider {
5555
Taichu = 'taichu',
5656
TogetherAI = 'togetherai',
5757
Upstage = 'upstage',
58+
VertexAI = 'vertexai',
5859
Wenxin = 'wenxin',
5960
XAI = 'xai',
6061
ZeroOne = 'zeroone',
+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import { VertexAI, VertexInit } from '@google-cloud/vertexai';
2+
3+
import { AgentRuntimeError, AgentRuntimeErrorType, LobeGoogleAI } from '@/libs/agent-runtime';
4+
5+
export class LobeVertexAI extends LobeGoogleAI {
6+
static initFromVertexAI(params?: VertexInit) {
7+
try {
8+
const client = new VertexAI({ ...params });
9+
10+
return new LobeGoogleAI({ apiKey: 'avoid-error', client });
11+
} catch (e) {
12+
const err = e as Error;
13+
14+
if (err.name === 'IllegalArgumentError') {
15+
throw AgentRuntimeError.createError(AgentRuntimeErrorType.InvalidVertexCredentials, {
16+
message: err.message,
17+
});
18+
}
19+
20+
throw e;
21+
}
22+
}
23+
}

src/types/user/settings/keyVaults.ts

+1
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ export interface UserKeyVaults {
6262
taichu?: OpenAICompatibleKeyVault;
6363
togetherai?: OpenAICompatibleKeyVault;
6464
upstage?: OpenAICompatibleKeyVault;
65+
vertexai?: undefined;
6566
wenxin?: WenxinKeyVault;
6667
xai?: OpenAICompatibleKeyVault;
6768
zeroone?: OpenAICompatibleKeyVault;

0 commit comments

Comments
 (0)