Skip to content

Commit bfd9a10

Browse files
committed
💄 style: show token generate performance
1 parent 59cafa0 commit bfd9a10

File tree

8 files changed

+124
-18
lines changed

8 files changed

+124
-18
lines changed

src/features/Conversation/Extras/Usage/UsageDetail/index.tsx

+20-4
Original file line numberDiff line numberDiff line change
@@ -9,27 +9,27 @@ import { Center, Flexbox } from 'react-layout-kit';
99
import { aiModelSelectors, useAiInfraStore } from '@/store/aiInfra';
1010
import { useGlobalStore } from '@/store/global';
1111
import { systemStatusSelectors } from '@/store/global/selectors';
12-
import { ModelTokensUsage } from '@/types/message';
12+
import { MessageMetadata } from '@/types/message';
1313
import { formatNumber } from '@/utils/format';
1414

1515
import ModelCard from './ModelCard';
1616
import TokenProgress, { TokenProgressItem } from './TokenProgress';
1717
import { getDetailsToken } from './tokens';
1818

1919
interface TokenDetailProps {
20+
meta: MessageMetadata;
2021
model: string;
2122
provider: string;
22-
usage: ModelTokensUsage;
2323
}
2424

25-
const TokenDetail = memo<TokenDetailProps>(({ usage, model, provider }) => {
25+
const TokenDetail = memo<TokenDetailProps>(({ meta, model, provider }) => {
2626
const { t } = useTranslation('chat');
2727
const theme = useTheme();
2828

2929
const modelCard = useAiInfraStore(aiModelSelectors.getModelCard(model, provider));
3030
const isShowCredit = useGlobalStore(systemStatusSelectors.isShowCredit) && !!modelCard?.pricing;
3131

32-
const detailTokens = getDetailsToken(usage, modelCard);
32+
const detailTokens = getDetailsToken(meta, modelCard);
3333
const inputDetails = [
3434
!!detailTokens.inputAudio && {
3535
color: theme.cyan9,
@@ -113,6 +113,10 @@ const TokenDetail = memo<TokenDetailProps>(({ usage, model, provider }) => {
113113
detailTokens.totalTokens!.credit / detailTokens.totalTokens!.token,
114114
2,
115115
);
116+
117+
const tps = meta?.tps ? formatNumber(meta.tps, 2) : undefined;
118+
const ttft = meta?.ttft ? formatNumber(meta.ttft / 1000, 2) : undefined;
119+
116120
return (
117121
<Popover
118122
arrow={false}
@@ -170,6 +174,18 @@ const TokenDetail = memo<TokenDetailProps>(({ usage, model, provider }) => {
170174
<div style={{ fontWeight: 500 }}>{averagePricing}</div>
171175
</Flexbox>
172176
)}
177+
{tps && (
178+
<Flexbox align={'center'} gap={4} horizontal justify={'space-between'}>
179+
<div style={{ color: theme.colorTextSecondary }}>{'Token Speed (t/s)'}</div>
180+
<div style={{ fontWeight: 500 }}>{tps}</div>
181+
</Flexbox>
182+
)}
183+
{ttft && (
184+
<Flexbox align={'center'} gap={4} horizontal justify={'space-between'}>
185+
<div style={{ color: theme.colorTextSecondary }}>{'Delay (s)'}</div>
186+
<div style={{ fontWeight: 500 }}>{ttft}</div>
187+
</Flexbox>
188+
)}
173189
</Flexbox>
174190
</Flexbox>
175191
</Flexbox>

src/features/Conversation/Extras/Usage/index.tsx

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ const Usage = memo<UsageProps>(({ model, metadata, provider }) => {
3131
</Center>
3232

3333
{!!metadata.totalTokens && (
34-
<TokenDetail model={model as string} provider={provider} usage={metadata} />
34+
<TokenDetail meta={metadata} model={model as string} provider={provider} />
3535
)}
3636
</Flexbox>
3737
);

src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts

+17-8
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ export const LobeOpenAICompatibleFactory = <T extends Record<string, any> = any>
201201

202202
async chat({ responseMode, ...payload }: ChatStreamPayload, options?: ChatCompetitionOptions) {
203203
try {
204+
const inputStartAt = Date.now();
204205
const postPayload = chatCompletion?.handlePayload
205206
? chatCompletion.handlePayload(payload, this._options)
206207
: ({
@@ -253,10 +254,14 @@ export const LobeOpenAICompatibleFactory = <T extends Record<string, any> = any>
253254
debugStream(useForDebugStream).catch(console.error);
254255
}
255256

256-
const streamHandler = chatCompletion?.handleStream || OpenAIStream;
257-
return StreamingResponse(streamHandler(prod, streamOptions), {
258-
headers: options?.headers,
259-
});
257+
return StreamingResponse(
258+
chatCompletion?.handleStream
259+
? chatCompletion.handleStream(prod, streamOptions.callbacks)
260+
: OpenAIStream(prod, streamOptions, { inputStartAt }),
261+
{
262+
headers: options?.headers,
263+
},
264+
);
260265
}
261266

262267
if (debug?.chatCompletion?.()) {
@@ -269,10 +274,14 @@ export const LobeOpenAICompatibleFactory = <T extends Record<string, any> = any>
269274
chatCompletion?.handleTransformResponseToStream || transformResponseToStream;
270275
const stream = transformHandler(response as unknown as OpenAI.ChatCompletion);
271276

272-
const streamHandler = chatCompletion?.handleStream || OpenAIStream;
273-
return StreamingResponse(streamHandler(stream, streamOptions), {
274-
headers: options?.headers,
275-
});
277+
return StreamingResponse(
278+
chatCompletion?.handleStream
279+
? chatCompletion.handleStream(stream, streamOptions.callbacks)
280+
: OpenAIStream(stream, streamOptions, { inputStartAt }),
281+
{
282+
headers: options?.headers,
283+
},
284+
);
276285
} catch (error) {
277286
throw this.handleError(error);
278287
}

src/libs/agent-runtime/utils/streams/openai.ts

+13-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import {
1616
createCallbacksTransformer,
1717
createFirstErrorHandleTransformer,
1818
createSSEProtocolTransformer,
19+
createTokenSpeedCalculator,
1920
generateToolCallId,
2021
} from './protocol';
2122

@@ -39,6 +40,11 @@ export const transformOpenAIStream = (
3940
}
4041

4142
try {
43+
// performance monitor return
44+
if (chunk.id === 'speed') {
45+
return { data: chunk.object, id: chunk.id, type: 'speed' };
46+
}
47+
4248
// maybe need another structure to add support for multiple choices
4349
const item = chunk.choices[0];
4450
if (!item) {
@@ -221,9 +227,14 @@ export interface OpenAIStreamOptions {
221227
provider?: string;
222228
}
223229

230+
export interface TraceOptions {
231+
inputStartAt?: number;
232+
}
233+
224234
export const OpenAIStream = (
225235
stream: Stream<OpenAI.ChatCompletionChunk> | ReadableStream,
226236
{ callbacks, provider, bizErrorTypeTransformer }: OpenAIStreamOptions = {},
237+
{ inputStartAt }: TraceOptions = {},
227238
) => {
228239
const streamStack: StreamContext = { id: '' };
229240

@@ -236,7 +247,8 @@ export const OpenAIStream = (
236247
// provider like huggingface or minimax will return error in the stream,
237248
// so in the first Transformer, we need to handle the error
238249
.pipeThrough(createFirstErrorHandleTransformer(bizErrorTypeTransformer, provider))
239-
.pipeThrough(createSSEProtocolTransformer(transformOpenAIStream, streamStack))
250+
.pipeThrough(createTokenSpeedCalculator(transformOpenAIStream, { inputStartAt, streamStack }))
251+
.pipeThrough(createSSEProtocolTransformer((c) => c, streamStack))
240252
.pipeThrough(createCallbacksTransformer(callbacks))
241253
);
242254
};

src/libs/agent-runtime/utils/streams/protocol.ts

+44-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { ChatStreamCallbacks } from '@/libs/agent-runtime';
2-
import { ModelTokensUsage } from '@/types/message';
2+
import { ModelSpeed, ModelTokensUsage } from '@/types/message';
33

44
import { AgentRuntimeErrorType } from '../../error';
55

@@ -50,6 +50,8 @@ export interface StreamProtocolChunk {
5050
| 'error'
5151
// token usage
5252
| 'usage'
53+
// performance monitor
54+
| 'speed'
5355
// unknown data result
5456
| 'data';
5557
}
@@ -244,3 +246,44 @@ export const createSSEDataExtractor = () =>
244246
}
245247
},
246248
});
249+
250+
/**
251+
* Create a middleware to calculate the token generate speed
252+
* @requires createSSEProtocolTransformer
253+
*/
254+
export const createTokenSpeedCalculator = (
255+
transformer: (chunk: any, stack: StreamContext) => StreamProtocolChunk | StreamProtocolChunk[],
256+
{ streamStack, inputStartAt }: { inputStartAt?: number; streamStack?: StreamContext } = {},
257+
) => {
258+
let outputStartAt: number | undefined;
259+
260+
const process = (chunk: StreamProtocolChunk) => {
261+
let result = [chunk];
262+
// if the chunk is the first text chunk, set as output start
263+
if (!outputStartAt && chunk.type === 'text') outputStartAt = Date.now();
264+
// if the chunk is the stop chunk, set as output finish
265+
if (inputStartAt && outputStartAt && chunk.type === 'usage') {
266+
const outputTokens = chunk.data?.totalOutputTokens || chunk.data?.outputTextTokens;
267+
result.push({
268+
data: {
269+
tps: (outputTokens / (Date.now() - outputStartAt)) * 1000,
270+
ttft: outputStartAt - inputStartAt,
271+
} as ModelSpeed,
272+
id: 'output_speed',
273+
type: 'speed',
274+
});
275+
}
276+
return result;
277+
};
278+
279+
return new TransformStream({
280+
transform(chunk, controller) {
281+
let result = transformer(chunk, streamStack || { id: '' });
282+
if (!Array.isArray(result)) result = [result];
283+
result.forEach((r) => {
284+
const processed = process(r);
285+
if (processed) processed.forEach((p) => controller.enqueue(p));
286+
});
287+
},
288+
});
289+
};

src/store/chat/slices/aiChat/actions/generateAIChat.ts

+4-2
Original file line numberDiff line numberDiff line change
@@ -562,7 +562,7 @@ export const generateAIChat: StateCreator<
562562
},
563563
onFinish: async (
564564
content,
565-
{ traceId, observationId, toolCalls, reasoning, grounding, usage },
565+
{ traceId, observationId, toolCalls, reasoning, grounding, usage, speed },
566566
) => {
567567
// if there is traceId, update it
568568
if (traceId) {
@@ -592,13 +592,15 @@ export const generateAIChat: StateCreator<
592592
internal_toggleToolCallingStreaming(messageId, undefined);
593593
}
594594

595+
console.log('Generate Message Finish:', usage, speed);
596+
595597
// update the content after fetch result
596598
await internal_updateMessageContent(messageId, content, {
597599
toolCalls,
598600
reasoning: !!reasoning ? { ...reasoning, duration } : undefined,
599601
search: !!grounding?.citations ? grounding : undefined,
600-
metadata: usage,
601602
imageList: finalImages.length > 0 ? finalImages : undefined,
603+
metadata: speed ? { ...usage, ...speed } : usage,
602604
});
603605
},
604606
onMessageHandle: async (chunk) => {

src/types/message/base.ts

+8
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,16 @@ export interface ModelTokensUsage {
4444
totalTokens?: number;
4545
}
4646

47+
export interface ModelSpeed {
48+
// tokens per second
49+
tps?: number;
50+
// time to fist token
51+
ttft?: number;
52+
}
53+
4754
export interface MessageMetadata extends ModelTokensUsage {
4855
tps?: number;
56+
ttft?: number;
4957
}
5058

5159
export type MessageRoleType = 'user' | 'system' | 'assistant' | 'tool';

src/utils/fetch/fetchSSE.ts

+17-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import {
1010
MessageToolCallChunk,
1111
MessageToolCallSchema,
1212
ModelReasoning,
13+
ModelSpeed,
1314
ModelTokensUsage,
1415
} from '@/types/message';
1516
import { ChatImageChunk } from '@/types/message/image';
@@ -29,6 +30,7 @@ export type OnFinishHandler = (
2930
images?: ChatImageChunk[];
3031
observationId?: string | null;
3132
reasoning?: ModelReasoning;
33+
speed?: ModelSpeed;
3234
toolCalls?: MessageToolCall[];
3335
traceId?: string | null;
3436
type?: SSEFinishType;
@@ -41,6 +43,11 @@ export interface MessageUsageChunk {
4143
usage: ModelTokensUsage;
4244
}
4345

46+
export interface MessageSpeedChunk {
47+
speed: ModelSpeed;
48+
type: 'speed';
49+
}
50+
4451
export interface MessageTextChunk {
4552
text: string;
4653
type: 'text';
@@ -82,7 +89,8 @@ export interface FetchSSEOptions {
8289
| MessageReasoningChunk
8390
| MessageGroundingChunk
8491
| MessageUsageChunk
85-
| MessageBase64ImageChunk,
92+
| MessageBase64ImageChunk
93+
| MessageSpeedChunk,
8694
) => void;
8795
smoothing?: SmoothingParams | boolean;
8896
}
@@ -342,6 +350,7 @@ export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptio
342350
let grounding: GroundingSearch | undefined = undefined;
343351
let usage: ModelTokensUsage | undefined = undefined;
344352
let images: ChatImageChunk[] = [];
353+
let speed: ModelSpeed | undefined = undefined;
345354

346355
await fetchEventSource(url, {
347356
body: options.body,
@@ -433,6 +442,12 @@ export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptio
433442
break;
434443
}
435444

445+
case 'speed': {
446+
speed = data;
447+
options.onMessageHandle?.({ speed: data, type: 'speed' });
448+
break;
449+
}
450+
436451
case 'grounding': {
437452
grounding = data;
438453
options.onMessageHandle?.({ grounding: data, type: 'grounding' });
@@ -517,6 +532,7 @@ export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptio
517532
images: images.length > 0 ? images : undefined,
518533
observationId,
519534
reasoning: !!thinking ? { content: thinking, signature: thinkingSignature } : undefined,
535+
speed,
520536
toolCalls,
521537
traceId,
522538
type: finishedType,

0 commit comments

Comments
 (0)