Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@ import {
// ── matchesPresetHwFilter ────────────────────────────────────────────

describe('matchesPresetHwFilter', () => {
const dsv4 = Model.DeepSeek_V4_Pro; // mtpEngineExclusion = true
const dsr1 = Model.DeepSeek_R1; // no MTP exclusion
const dsv4 = Model.DeepSeek_V4_Pro; // has an MTP exclusion rule
const dsr1 = Model.DeepSeek_R1; // no exclusion rule

it('matches a bare GPU prefix against any framework variant on that GPU', () => {
expect(matchesPresetHwFilter('b300_sglang', ['b300'], dsv4)).toBe(true);
expect(matchesPresetHwFilter('b300_vllm', ['b300'], dsv4)).toBe(true);
expect(matchesPresetHwFilter('b300_dynamo-vllm', ['b300'], dsv4)).toBe(true);
});

it('skips _mtp keys via a bare GPU prefix only for mtpEngineExclusion models', () => {
// dsv4 has mtpEngineExclusion → MTP keys filtered out under bare prefix
it('skips _mtp keys via a bare GPU prefix only for models with an exclusion rule', () => {
// dsv4 has an MTP exclusion rule → MTP keys filtered out under bare prefix
expect(matchesPresetHwFilter('b300_sglang_mtp', ['b300'], dsv4)).toBe(false);
expect(matchesPresetHwFilter('b300_vllm_mtp', ['b300'], dsv4)).toBe(false);
// dsr1 (and other models) → bare prefix still pulls MTP variants through
Expand Down
19 changes: 8 additions & 11 deletions packages/app/src/components/favorites/favorite-presets.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import type { BenchmarkRow } from '@/lib/api';
import { hasMtpEngineExclusion, Model, Sequence } from '@/lib/data-mappings';
import { getModelExclusion, Model, Sequence } from '@/lib/data-mappings';

export interface FavoritePreset {
id: string;
Expand All @@ -26,23 +26,20 @@ export interface FavoritePreset {
* Match an hwKey against a preset's hwFilter. Exact entries always match
* exactly (so MTP keys like `h100_dynamo-trt_mtp` can be explicitly opted in).
* Bare GPU prefixes (no underscore) match any framework variant on that GPU,
* but for models with `mtpEngineExclusion` (currently dsv4) they also skip
* `_mtp` keys — otherwise the preset would surface two engine families'
* forced-acceptance MTP numbers on the same chart, which the legend toggle
* guard already blocks for explicit user actions.
* but for models with an exclusion rule (currently dsv4 MTP) they also skip
* keys matching the rule's suffix — otherwise the preset would surface two
* comparability groups on the same chart, which the legend toggle guard already
* blocks for explicit user actions.
*/
export function matchesPresetHwFilter(
hwKey: string,
filter: string[],
model: Model | string | null | undefined,
): boolean {
const skipMtpOnPrefix = hasMtpEngineExclusion(model);
const excludedSuffixes = getModelExclusion(model).map((spec) => spec.suffix);
const isExcludedVariant = excludedSuffixes.some((suffix) => hwKey.endsWith(suffix));
return filter.some(
(f) =>
hwKey === f ||
(!f.includes('_') &&
hwKey.startsWith(`${f}_`) &&
!(skipMtpOnPrefix && hwKey.endsWith('_mtp'))),
(f) => hwKey === f || (!f.includes('_') && hwKey.startsWith(`${f}_`) && !isExcludedVariant),
);
}

Expand Down
76 changes: 42 additions & 34 deletions packages/app/src/components/inference/InferenceContext.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,17 @@ import {
import { useUrlState } from '@/hooks/useUrlState';
import { buildAvailabilityHwKey } from '@/lib/chart-utils';
import { getHardwareConfig, getModelSortIndex, isKnownGpu, TABLEAU_10 } from '@/lib/constants';
import { hasMtpEngineExclusion, MODEL_PREFIX_MAPPING } from '@/lib/data-mappings';
import { getModelExclusion, MODEL_PREFIX_MAPPING } from '@/lib/data-mappings';
import {
MtpEngineConflictToast,
type MtpEngineConflictDetail,
} from '@/components/mtp-engine-conflict-toast';
import { clearAllMtpFamilies, effectiveLegendItems, resolveMtpToggle } from '@/lib/mtp-exclusion';
import {
buildExclusion,
clearAllExclusionGroups,
effectiveLegendItems,
resolveExclusionToggle,
} from '@/lib/exclusion';
import { filterRunsByModel, getDisplayLabel } from '@/lib/utils';

import { useChartData } from './hooks/useChartData';
Expand Down Expand Up @@ -422,8 +427,8 @@ export function InferenceProvider({
const pendingHwFilterRef = useRef(pendingHwFilter);
pendingHwFilterRef.current = pendingHwFilter;
// Read selectedModel via a ref so the callback identity below stays stable —
// matchesPresetHwFilter only consults the model to gate the bare-prefix MTP
// skip (mtpEngineExclusion models), and we want the current value at call time.
// matchesPresetHwFilter only consults the model to gate the bare-prefix
// exclusion-suffix skip, and we want the current value at call time.
const selectedModelRef = useRef(selectedModel);
selectedModelRef.current = selectedModel;
// Note: setActiveHwTypes is a useState dispatcher that accepts functional updaters,
Expand Down Expand Up @@ -474,18 +479,21 @@ export function InferenceProvider({
}
}, [pendingHwFilter, hwTypesWithData, setActiveHwTypes]);

const mtpExclusion = hasMtpEngineExclusion(selectedModel);
const exclusion = useMemo(() => {
const specs = getModelExclusion(selectedModel);
return specs.length > 0 ? buildExclusion(specs) : null;
}, [selectedModel]);
const toggleHwType = useCallback(
(hw: string) => {
// Under MTP exclusion, hide MTP keys from inactive families when
// Under exclusion, hide participating keys from inactive groups when
// computing the toggle "universe". This makes the default-deselected
// state (DSv4 on first load) count as "all selected", so clicking a
// state (DSv4 MTP on first load) count as "all selected", so clicking a
// legend entry solos it instead of just removing it.
const toggleUniverse = mtpExclusion
? effectiveLegendItems(hwTypesWithData, activeHwTypes)
const toggleUniverse = exclusion
? effectiveLegendItems(hwTypesWithData, activeHwTypes, exclusion)
: hwTypesWithData;
if (mtpExclusion) {
const decision = resolveMtpToggle(activeHwTypes, hw, toggleUniverse);
if (exclusion) {
const decision = resolveExclusionToggle(activeHwTypes, hw, toggleUniverse, exclusion);
if (decision.kind === 'block') {
setMtpConflict({
kind: 'blocked',
Expand All @@ -505,7 +513,7 @@ export function InferenceProvider({
setActivePresetId(null);
presetHwFilterRef.current = null;
},
[toggleHwRaw, hwTypesWithData, mtpExclusion, activeHwTypes, setActiveHwTypes],
[toggleHwRaw, hwTypesWithData, exclusion, activeHwTypes, setActiveHwTypes],
);

const removeHwType = useCallback(
Expand Down Expand Up @@ -536,16 +544,16 @@ export function InferenceProvider({
);
const removeActiveDate = useCallback((id: string) => removeDateRaw(id), [removeDateRaw]);
const selectAllHwTypes = useCallback(() => {
if (mtpExclusion) {
const { result, droppedFamilies } = clearAllMtpFamilies(hwTypesWithData);
if (exclusion) {
const { result, droppedGroups } = clearAllExclusionGroups(hwTypesWithData, exclusion);
setActiveHwTypes(result);
if (droppedFamilies.length > 0) {
setMtpConflict({ kind: 'cleared', families: droppedFamilies });
if (droppedGroups.length > 0) {
setMtpConflict({ kind: 'cleared', families: droppedGroups });
}
return;
}
selectAllHwRaw(hwTypesWithData);
}, [selectAllHwRaw, hwTypesWithData, mtpExclusion, setActiveHwTypes]);
}, [selectAllHwRaw, hwTypesWithData, exclusion, setActiveHwTypes]);
const selectAllActiveDates = useCallback(
() => selectAllDatesRaw(allDateIds),
[selectAllDatesRaw, allDateIds],
Expand Down Expand Up @@ -588,11 +596,11 @@ export function InferenceProvider({
// → fall back to the default "all available" set. MTP sanitization is then
// applied below so the fallback itself is engine-exclusion safe.
if (restored.size === 0) restored = hwTypesWithData;
if (mtpExclusion) {
const cleared = clearAllMtpFamilies(restored);
if (exclusion) {
const cleared = clearAllExclusionGroups(restored, exclusion);
restored = cleared.result;
if (cleared.droppedFamilies.length > 0) {
setMtpConflict({ kind: 'cleared', families: cleared.droppedFamilies });
if (cleared.droppedGroups.length > 0) {
setMtpConflict({ kind: 'cleared', families: cleared.droppedGroups });
}
}
setActiveHwTypes(restored);
Expand All @@ -601,7 +609,7 @@ export function InferenceProvider({
}, [
pendingActiveHwTypes,
hwTypesWithData,
mtpExclusion,
exclusion,
selectedModel,
effectiveSequence,
precisionsKey,
Expand All @@ -622,22 +630,22 @@ export function InferenceProvider({
);
if (filtered.size > 0) {
// Presets explicitly chose hw configs — respect their picks. The
// matcher already excludes _mtp under bare prefixes for
// mtpEngineExclusion models, so we don't fall through to
// clearAllMtpFamilies (which would fire the toast). The legend
// toggle guard still blocks adding a second engine family later.
// matcher already excludes rule-suffix keys under bare prefixes for
// models with an exclusion rule, so we don't fall through to
// clearAllExclusionGroups (which would fire the toast). The legend
// toggle guard still blocks adding a second comparability group later.
setActiveHwTypes(filtered);
return;
}
}
if (mtpExclusion) {
// When multiple engine families' MTP have data, disable them all by
// default and surface a toast. The user has to opt in to one engine's
// MTP explicitly — never multiple at once.
const { result, droppedFamilies } = clearAllMtpFamilies(hwTypesWithData);
if (exclusion) {
// When multiple comparability groups have data, disable them all by
// default and surface a toast. The user has to opt into one group
// explicitly — never multiple at once.
const { result, droppedGroups } = clearAllExclusionGroups(hwTypesWithData, exclusion);
setActiveHwTypes(result);
if (droppedFamilies.length > 0) {
setMtpConflict({ kind: 'cleared', families: droppedFamilies });
if (droppedGroups.length > 0) {
setMtpConflict({ kind: 'cleared', families: droppedGroups });
}
return;
}
Expand All @@ -647,7 +655,7 @@ export function InferenceProvider({
effectiveSequence,
precisionsKey,
hwTypesWithData,
mtpExclusion,
exclusion,
pendingActiveHwTypes,
]);

Expand Down
37 changes: 27 additions & 10 deletions packages/app/src/lib/data-mappings.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import type { ExclusionSpec } from './exclusion';

export enum Model {
Llama3_3_70B = 'Llama-3.3-70B-Instruct-FP8',
Llama3_1_70B = 'Llama-3.1-70B-Instruct-FP8-KV',
Expand Down Expand Up @@ -41,13 +43,23 @@ interface ModelConfig {
prefix: string;
category: CategoryTag;
/**
* If true, MTP configs from different engine families (e.g. vLLM and SGLang)
* cannot be active simultaneously, since their acceptance-rate forcing
* implementations differ and aren't directly comparable on the same graph.
* Data-driven exclusion rules for this model (see `exclusion.ts`). Each spec
* partitions matching config keys into comparability groups that can't share
* a graph with each other. Absent/empty = no exclusion.
*/
mtpEngineExclusion?: boolean;
exclusion?: ExclusionSpec[];
}

/**
* dsv4 MTP exclusion: MTP configs (`*_mtp`) from different engine families can't
* be active together because their acceptance-rate forcing implementations
* differ. ATOM and SGLang share the upstream ROCm MTP path, so they form one
* comparability group; vLLM is its own group.
*/
const MTP_ENGINE_EXCLUSION: ExclusionSpec[] = [
{ suffix: '_mtp', stripPrefixes: ['dynamo-', 'mori-'], groupAliases: { atom: 'sglang' } },
];

// Total parameter counts appended to each label so users can compare model
// scale at a glance in the dropdown. For Llama and gpt-oss the count is
// already part of the canonical name (Llama 3.3 70B, gpt-oss 120B) so no
Expand All @@ -58,7 +70,7 @@ const MODEL_CONFIG: Record<Model, ModelConfig> = {
label: 'DeepSeek V4 Pro 1.6T',
prefix: 'dsv4',
category: 'default',
mtpEngineExclusion: true,
exclusion: MTP_ENGINE_EXCLUSION,
},
[Model.Kimi_K2_5]: {
// K2.5 and K2.6 share an architecture, so the dropdown surfaces both
Expand Down Expand Up @@ -117,12 +129,17 @@ export function getModelLabel(model: Model): string {
}

/**
* True if the model enforces the rule that MTP configs from different engine
* families can't be shown on the same graph.
* Exclusion specs configured for a model (see `exclusion.ts`). Empty when the
* model has no exclusion rules.
*/
export function hasMtpEngineExclusion(model: Model | string | null | undefined): boolean {
if (!model) return false;
return MODEL_CONFIG[model as Model]?.mtpEngineExclusion === true;
export function getModelExclusion(model: Model | string | null | undefined): ExclusionSpec[] {
if (!model) return [];
return MODEL_CONFIG[model as Model]?.exclusion ?? [];
}

/** True if the model has any config-exclusion rule. */
export function hasExclusion(model: Model | string | null | undefined): boolean {
return getModelExclusion(model).length > 0;
Comment thread
adibarra marked this conversation as resolved.
}

/**
Expand Down
Loading
Loading