diff --git a/public/cost_2026_01_08.csv b/public/cost_2026_01_08.csv new file mode 100644 index 0000000..3f452c9 --- /dev/null +++ b/public/cost_2026_01_08.csv @@ -0,0 +1,15 @@ +model,avg_input_tokens,avg_output_tokens,cost_per_question +gpt-5.5-xhigh,722,7826,0.2384 +gpt-5.4-xhigh,745,12723,0.1927 +gemini-3.1-pro-preview-high,795,12756,0.1547 +gemini-3.5-flash-high,796,14961,0.1358 +gpt-5.2-2025-12-11-high,751,7597,0.1077 +gpt-5.2-codex,693,8056,0.114 +gpt-5.1-codex-max-high,760,18663,0.1876 +gpt-5.3-codex-high,694,4229,0.0604 +gpt-5.1-2025-11-13-high,670,13388,0.1347 +gemini-3-pro-preview-11-2025-high,725,10678,0.1296 +gemini-3-flash-preview-high,848,21635,0.0653 +qwen3.7-max,785,12193,0.0934 +deepseek-v4-pro,715,33261,0.0292 +kimi-k2.6-thinking,706,21184,0.0854 diff --git a/src/App.css b/src/App.css index 95906ef..8fb4edb 100644 --- a/src/App.css +++ b/src/App.css @@ -302,4 +302,7 @@ thead th:first-child { .liveswebench-callout:hover { background-color: rgb(122, 220, 233); -} \ No newline at end of file +} +/* Cost / token-efficiency columns (optional, top-models-only) */ +.cost-na { color: #b0b0b0; } +th.cost-col, td.cost-col { white-space: nowrap; border-left: 1px solid #ececec; } diff --git a/src/Table/CSVTable.jsx b/src/Table/CSVTable.jsx index 7afd893..c559b00 100644 --- a/src/Table/CSVTable.jsx +++ b/src/Table/CSVTable.jsx @@ -10,7 +10,8 @@ import Select from 'react-select'; const CSVTable = ({dateStr}) => { const date = new Date(dateStr).toISOString().split('T')[0].replaceAll('-', '_'); - const [data, setData] = useState([]); + const [rawData, setRawData] = useState([]); + const [costMap, setCostMap] = useState({}); const [categories, setCategories] = useState({}); const [checkedCategories, setCheckedCategories] = useState({}); const [screenWidth, setScreenWidth] = useState(window.innerWidth); @@ -23,8 +24,14 @@ const CSVTable = ({dateStr}) => { const [showOpenWeights, setShowOpenWeights] = useState(false); const [showVariants, setShowVariants] = useState(false); const [showHighUnseenBias, setShowHighUnseenBias] = useState(true); + // Cost / token-efficiency metrics are published only for the top set of models; + // models without an entry render as "—" and a separate toggle can filter to those with data. + const [showCost, setShowCost] = useState(false); + const [showCostOnly, setShowCostOnly] = useState(false); - const updateURL = (checkedCategories, newFilter, newSortField = null, newSortOrder = null, newShowProvider = null, newShowApiName = null, newShowReasoners = null, newShowOpenWeights = null, newShowVariants = null, newShowHighUnseenBias = null, newSearchQuery = null) => { + const COST_NA_TITLE = "Token & cost metrics are published for the top models only"; + + const updateURL = (checkedCategories, newFilter, newSortField = null, newSortOrder = null, newShowProvider = null, newShowApiName = null, newShowReasoners = null, newShowOpenWeights = null, newShowVariants = null, newShowHighUnseenBias = null, newSearchQuery = null, newShowCost = null, newShowCostOnly = null) => { const params = new URLSearchParams(); let allAverages = true; @@ -72,6 +79,8 @@ const CSVTable = ({dateStr}) => { const effectiveShowOpenWeights = newShowOpenWeights !== null ? newShowOpenWeights : showOpenWeights; const effectiveShowVariants = newShowVariants !== null ? newShowVariants : showVariants; const effectiveShowHighUnseenBias = newShowHighUnseenBias !== null ? newShowHighUnseenBias : showHighUnseenBias; + const effectiveShowCost = newShowCost !== null ? newShowCost : showCost; + const effectiveShowCostOnly = newShowCostOnly !== null ? newShowCostOnly : showCostOnly; if (!effectiveShowProvider) params.set('provider', 'false'); if (effectiveShowApiName) params.set('api', 'true'); @@ -79,6 +88,8 @@ const CSVTable = ({dateStr}) => { if (effectiveShowOpenWeights) params.set('openweight', 'true'); if (effectiveShowVariants) params.set('variants', 'true'); if (effectiveShowHighUnseenBias) params.set('highunseenbias', 'true'); + if (effectiveShowCost) params.set('cost', 'true'); + if (effectiveShowCostOnly) params.set('costonly', 'true'); if (allAverages && !anySubcategories) { const newParams = new URLSearchParams(); @@ -104,6 +115,8 @@ const CSVTable = ({dateStr}) => { if (effectiveShowOpenWeights) newParams.set('openweight', 'true'); if (effectiveShowVariants) newParams.set('variants', 'true'); if (effectiveShowHighUnseenBias) newParams.set('highunseenbias', 'true'); + if (effectiveShowCost) newParams.set('cost', 'true'); + if (effectiveShowCostOnly) newParams.set('costonly', 'true'); setSearchParams(newParams); return; } @@ -155,6 +168,15 @@ const CSVTable = ({dateStr}) => { return res; }, [dateStr]); + // Merge cost metrics into each row so they sort with the existing machinery. + // Missing cost is set to null (not undefined) so SortTable's null-handling pushes + // those rows to the bottom regardless of sort order. + const data = useMemo(() => rawData.map(row => ({ + ...row, + avg_output_tokens: costMap[row.model]?.avg_output_tokens ?? null, + cost_per_question: costMap[row.model]?.cost_per_question ?? null, + })), [rawData, costMap]); + const [sortedData, handleSorting, handleSearch, handleFilter, sortField, sortOrder, searchQuery, filter] = useTable(data, columns, checkedCategories, categories, 'model', getModelInfo); useEffect(() => { @@ -166,11 +188,38 @@ const CSVTable = ({dateStr}) => { dynamicTyping: true, skipEmptyLines: true, complete: (result) => { - setData(result.data); + setRawData(result.data); } }); }); + // Optional cost dataset (published only for the top models). Absence is fine: + // costMap stays empty and every model renders "—" in the cost columns. + fetch(process.env.PUBLIC_URL + `/cost_${date}.csv`) + .then(response => response.ok ? response.text() : null) + .then(text => { + if (!text) { setCostMap({}); return; } + Papa.parse(text, { + header: true, + dynamicTyping: true, + skipEmptyLines: true, + complete: (result) => { + const map = {}; + result.data.forEach(r => { + if (r && r.model) { + map[r.model] = { + avg_output_tokens: r.avg_output_tokens, + avg_input_tokens: r.avg_input_tokens, + cost_per_question: r.cost_per_question, + }; + } + }); + setCostMap(map); + } + }); + }) + .catch(() => setCostMap({})); + fetch(process.env.PUBLIC_URL + `/categories_${date}.json`) .then(response => response.json()) .then(json => { @@ -239,6 +288,12 @@ const CSVTable = ({dateStr}) => { } else if (key === 'highunseenbias') { setShowHighUnseenBias(value === 'true'); return; + } else if (key === 'cost') { + setShowCost(value === 'true'); + return; + } else if (key === 'costonly') { + setShowCostOnly(value === 'true'); + return; } else if (Object.keys(categories).includes(key)) { if (value.includes('a')) { updatedCategories[key].average = true; @@ -285,7 +340,7 @@ const CSVTable = ({dateStr}) => { return; } updateURL(checkedCategories, filter); - }, [showProvider, showApiName, showReasoners, showOpenWeights, showVariants, showHighUnseenBias]); + }, [showProvider, showApiName, showReasoners, showOpenWeights, showVariants, showHighUnseenBias, showCost, showCostOnly]); const handleCheckboxChange = (clickedCategory, type) => { @@ -421,9 +476,11 @@ const CSVTable = ({dateStr}) => { setShowOpenWeights(false); setShowVariants(false); setShowHighUnseenBias(true); + setShowCost(false); + setShowCostOnly(false); // Update URL with default values (including empty search) - updateURL(defaultCategories, {}, 'ga', 'desc', true, false, true, false, false, false, ''); + updateURL(defaultCategories, {}, 'ga', 'desc', true, false, true, false, false, false, '', false, false); } // Utility to compute class for sorting @@ -481,9 +538,12 @@ const CSVTable = ({dateStr}) => { if (!showHighUnseenBias && info.highUnseenBias) { return false; } + if (showCostOnly && !costMap[row.model]) { + return false; + } return true; }); - }, [sortedData, showReasoners, showOpenWeights, showHighUnseenBias]); + }, [sortedData, showReasoners, showOpenWeights, showHighUnseenBias, showCostOnly, costMap]); const displayedData = useMemo(() => { if (showVariants) { @@ -559,6 +619,14 @@ const CSVTable = ({dateStr}) => { setShowHighUnseenBias(!showHighUnseenBias)} id="showHighUnseenBias" /> Show High Unseen Question Bias Models + +
@@ -624,6 +692,16 @@ const CSVTable = ({dateStr}) => { className={getSortClass(header)}> {header} ))} + {showCost && handleSortingChange("avg_output_tokens")}> + Output Tokens} + {showCost && handleSortingChange("cost_per_question")}> + Cost / Question} @@ -661,6 +739,16 @@ const CSVTable = ({dateStr}) => { } return res; }).map((cell, idx) => {cell})} + {showCost && (() => { + const c = costMap[row.model]; + const na = ; + return ( + <> + {c && c.avg_output_tokens != null ? Math.round(c.avg_output_tokens).toLocaleString() : na} + {c && c.cost_per_question != null ? `$${Number(c.cost_per_question).toFixed(3)}` : na} + + ); + })()} ); })}