feat: show output speed as last response with window average
- speed: replace input/output pair with two output speeds - last single response speed, window average in parentheses - format: "↓ 74.3 (120.5) t/s"
This commit is contained in:
+3
-3
@@ -55,9 +55,9 @@ fn git() {
|
||||
/// 用合成数据预览 token 速度的各种情况。
|
||||
fn speed() {
|
||||
let samples = [
|
||||
("正常", Speed { input_per_sec: Some(2.7), output_per_sec: Some(134.4) }),
|
||||
("高速(k)", Speed { input_per_sec: Some(1234.0), output_per_sec: Some(3456.0) }),
|
||||
("缺输入", Speed { input_per_sec: None, output_per_sec: Some(88.5) }),
|
||||
("正常", Speed { last_output: Some(74.3), window_output: Some(120.5) }),
|
||||
("高速(k)", Speed { last_output: Some(1234.0), window_output: Some(3456.0) }),
|
||||
("缺最近", Speed { last_output: None, window_output: Some(88.5) }),
|
||||
("无数据", Speed::EMPTY),
|
||||
];
|
||||
for (label, speed) in &samples {
|
||||
|
||||
+50
-46
@@ -1,6 +1,9 @@
|
||||
//! 从转录 JSONL 计算 token 速度。
|
||||
//! 从转录 JSONL 计算输出 token 速度。
|
||||
//!
|
||||
//! 速度 = 输出 token ÷ 实际生成时长。给两个结果:
|
||||
//! - `last_output`:最近一次响应的速度(末尾单条请求)
|
||||
//! - `window_output`:最近 N 次请求的窗口平均
|
||||
//!
|
||||
//! 速度 = token 数 ÷ 实际生成时长,只统计**最近 N 次请求**(滑动窗口)。
|
||||
//! "实际生成时长" = 每次请求 [上一条 user 时间戳 → 这条 assistant 时间戳] 区间,
|
||||
//! 合并重叠后求和(避免子代理并发时重复计时)。
|
||||
|
||||
@@ -17,16 +20,18 @@ const WINDOW: usize = 8;
|
||||
/// 而我们只要最近 WINDOW 次请求,回读末尾这一截即可——足够覆盖且开销恒定。
|
||||
const TAIL_BYTES: u64 = 1024 * 1024;
|
||||
|
||||
/// 一次 token 速度结果,单位 token/秒;无数据为 None。
|
||||
/// 输出 token 速度结果,单位 token/秒;无数据为 None。
|
||||
pub struct Speed {
|
||||
pub input_per_sec: Option<f64>,
|
||||
pub output_per_sec: Option<f64>,
|
||||
/// 最近一次响应的输出速度。
|
||||
pub last_output: Option<f64>,
|
||||
/// 最近 [`WINDOW`] 次请求的窗口输出速度。
|
||||
pub window_output: Option<f64>,
|
||||
}
|
||||
|
||||
impl Speed {
|
||||
pub const EMPTY: Speed = Speed {
|
||||
input_per_sec: None,
|
||||
output_per_sec: None,
|
||||
last_output: None,
|
||||
window_output: None,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -48,32 +53,18 @@ struct Message {
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct Usage {
|
||||
#[serde(default)]
|
||||
input_tokens: u64,
|
||||
#[serde(default)]
|
||||
cache_creation_input_tokens: u64,
|
||||
#[serde(default)]
|
||||
cache_read_input_tokens: u64,
|
||||
#[serde(default)]
|
||||
output_tokens: u64,
|
||||
}
|
||||
|
||||
impl Usage {
|
||||
/// 真实处理的输入 token = 新输入 + 缓存创建 + 缓存读取。
|
||||
fn total_input(&self) -> u64 {
|
||||
self.input_tokens + self.cache_creation_input_tokens + self.cache_read_input_tokens
|
||||
}
|
||||
}
|
||||
|
||||
/// 一次请求(一条带 usage 的 assistant 消息)。
|
||||
struct Request {
|
||||
input: u64,
|
||||
output: u64,
|
||||
/// 生成时间区间 (start_ms, end_ms),缺时间戳时为 None。
|
||||
interval: Option<(i64, i64)>,
|
||||
}
|
||||
|
||||
/// 计算最近 [`WINDOW`] 次请求的输入/输出速度。
|
||||
/// 计算最近一次响应与最近 [`WINDOW`] 次窗口的输出速度。
|
||||
pub fn speed(path: &str) -> Speed {
|
||||
let Some(content) = read_tail(path) else {
|
||||
return Speed::EMPTY;
|
||||
@@ -106,7 +97,6 @@ pub fn speed(path: &str) -> Speed {
|
||||
_ => None,
|
||||
};
|
||||
requests.push(Request {
|
||||
input: usage.total_input(),
|
||||
output: usage.output_tokens,
|
||||
interval,
|
||||
});
|
||||
@@ -115,34 +105,40 @@ pub fn speed(path: &str) -> Speed {
|
||||
}
|
||||
}
|
||||
|
||||
// 取最近 WINDOW 次请求。
|
||||
let start = requests.len().saturating_sub(WINDOW);
|
||||
let window = &requests[start..];
|
||||
if window.is_empty() {
|
||||
if requests.is_empty() {
|
||||
return Speed::EMPTY;
|
||||
}
|
||||
|
||||
let mut input = 0;
|
||||
let last_output = requests.last().and_then(single_speed);
|
||||
|
||||
let start = requests.len().saturating_sub(WINDOW);
|
||||
let window_output = window_speed(&requests[start..]);
|
||||
|
||||
Speed {
|
||||
last_output,
|
||||
window_output,
|
||||
}
|
||||
}
|
||||
|
||||
/// 单次请求的输出速度:输出 ÷ 区间时长。
|
||||
fn single_speed(r: &Request) -> Option<f64> {
|
||||
let (s, e) = r.interval?;
|
||||
let secs = (e - s) as f64 / 1000.0;
|
||||
(secs > 0.0).then(|| r.output as f64 / secs)
|
||||
}
|
||||
|
||||
/// 一组请求的窗口输出速度:输出之和 ÷ 合并后时长。
|
||||
fn window_speed(window: &[Request]) -> Option<f64> {
|
||||
let mut output = 0;
|
||||
let mut intervals: Vec<(i64, i64)> = Vec::new();
|
||||
for r in window {
|
||||
input += r.input;
|
||||
output += r.output;
|
||||
if let Some(iv) = r.interval {
|
||||
intervals.push(iv);
|
||||
}
|
||||
}
|
||||
|
||||
let duration_ms = merged_duration_ms(intervals);
|
||||
if duration_ms == 0 {
|
||||
return Speed::EMPTY;
|
||||
}
|
||||
|
||||
let secs = duration_ms as f64 / 1000.0;
|
||||
Speed {
|
||||
input_per_sec: Some(input as f64 / secs),
|
||||
output_per_sec: Some(output as f64 / secs),
|
||||
}
|
||||
(duration_ms > 0).then(|| output as f64 / (duration_ms as f64 / 1000.0))
|
||||
}
|
||||
|
||||
/// 读取文件末尾最多 [`TAIL_BYTES`] 字节。
|
||||
@@ -217,13 +213,21 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn total_input_sums_new_and_cached() {
|
||||
let u = Usage {
|
||||
input_tokens: 10,
|
||||
cache_creation_input_tokens: 20,
|
||||
cache_read_input_tokens: 30,
|
||||
output_tokens: 5,
|
||||
fn single_speed_uses_interval_duration() {
|
||||
// 输出 200,区间 2s → 100 t/s。
|
||||
let r = Request {
|
||||
output: 200,
|
||||
interval: Some((0, 2000)),
|
||||
};
|
||||
assert_eq!(u.total_input(), 60);
|
||||
assert_eq!(single_speed(&r), Some(100.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_speed_none_without_interval() {
|
||||
let r = Request {
|
||||
output: 200,
|
||||
interval: None,
|
||||
};
|
||||
assert!(single_speed(&r).is_none());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,30 +1,30 @@
|
||||
//! token 速度:`⇅ 输入/输出 t/s`,柔青色。
|
||||
//! 输出 token 速度:`↓ 最近 (窗口) t/s`,柔青色。
|
||||
//! 主值为最近一次响应,括号内为最近若干次的窗口平均。
|
||||
|
||||
use crate::color::{self, palette};
|
||||
use crate::sources::transcript::{self, Speed};
|
||||
use crate::status::Status;
|
||||
|
||||
/// 输入 / 输出箭头。
|
||||
const UP: &str = "↑";
|
||||
/// 输出箭头。
|
||||
const DOWN: &str = "↓";
|
||||
/// 固定配色:柔青。
|
||||
const COLOR: (u8, u8, u8) = palette::CYAN;
|
||||
|
||||
/// 读取转录、算速度并格式化,如 `↑2.7 ↓134.4 t/s`;完全无数据时返回空串(整段隐藏)。
|
||||
/// 读取转录、算速度并格式化,如 `↓ 74.3 (120.5) t/s`;完全无数据时返回空串(整段隐藏)。
|
||||
pub fn render(status: &Status) -> String {
|
||||
let speed = compute(status);
|
||||
if speed.input_per_sec.is_none() && speed.output_per_sec.is_none() {
|
||||
if speed.last_output.is_none() && speed.window_output.is_none() {
|
||||
return String::new();
|
||||
}
|
||||
display(&speed)
|
||||
}
|
||||
|
||||
/// 把速度格式化成 `↑输入 ↓输出 t/s` 并上色。供渲染与 `test token` 预览共用。
|
||||
/// 把速度格式化成 `↓ 最近 (窗口) t/s` 并上色。供渲染与 `test speed` 预览共用。
|
||||
pub fn display(speed: &Speed) -> String {
|
||||
let text = format!(
|
||||
"{UP} {} {DOWN} {} t/s",
|
||||
fmt(speed.input_per_sec),
|
||||
fmt(speed.output_per_sec)
|
||||
"{DOWN} {} ({}) t/s",
|
||||
fmt(speed.last_output),
|
||||
fmt(speed.window_output)
|
||||
);
|
||||
let (r, g, b) = COLOR;
|
||||
color::fg(&text, r, g, b)
|
||||
|
||||
Reference in New Issue
Block a user