Skip to content
Open
19 changes: 19 additions & 0 deletions public/cost_2026_01_08.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
model,AMPS_Hard,code_completion,code_generation,connections,consecutive_events,integrals_with_game,javascript,logic_with_navigation,math_comp,olympiad,paraphrase,plot_unscrambling,python,simplify,spatial,story_generation,summarize,tablejoin,tablereformat,theory_of_mind,typescript,typos,zebra_puzzle
deepseek-v4-pro,0.2504,0.2927,0.4335,1.1945,4.8001,7.6773,,1.3015,1.5286,2.905,1.2097,0.8195,,1.035,1.1736,0.8422,0.941,1.4651,0.1512,2.0217,,0.1168,3.8612
gemini-3-flash-preview-high,1.5257,2.1738,2.8599,2.6343,5.9797,11.9349,,3.7343,4.7163,2.4212,4.0288,2.1455,,4.1076,2.7227,3.855,3.9197,3.3915,2.1384,2.7283,,2.4754,4.3226
gemini-3-pro-preview-11-2025-high,2.8986,3.7064,5.7132,4.4913,20.253,34.3327,,9.314,14.3906,4.7474,4.8539,4.1325,,4.7546,4.5769,4.6127,4.4618,7.163,2.6717,6.486,,1.8465,13.6553
gemini-3.1-pro-preview-high,2.9741,3.0368,6.0119,2.344,28.505,43.6557,,9.9464,8.8926,5.4719,7.4282,10.0822,,6.903,4.9059,6.9235,6.6073,6.4908,3.9294,4.2992,,2.1866,10.8268
gemini-3.5-flash-high,1.1062,2.9346,4.9094,2.1636,25.9843,36.5173,,8.8282,6.4087,4.6562,7.4329,5.2138,,6.7386,3.6924,6.3501,6.0812,6.9276,3.9423,4.7356,,1.5401,12.8185
gpt-5.1-2025-11-13-high,0.9311,1.9775,2.8915,4.3942,19.2097,34.805,,9.9141,8.268,2.6258,5.0585,8.8235,,4.0447,4.359,4.1457,4.1656,3.6882,1.9167,10.9247,,1.2402,23.5375
gpt-5.1-codex-max-high,2.0986,3.5489,4.0357,8.8463,15.0326,49.2791,,11.5988,10.0669,10.3044,7.1546,8.6589,,5.9656,9.3179,5.7747,5.473,7.0886,2.271,11.4198,,2.3984,13.5836
gpt-5.2-2025-12-11-high,0.455,1.3874,1.9742,1.9068,22.2134,27.061,,4.7384,4.8503,6.1639,6.2153,3.7762,,4.5995,2.0256,4.598,4.9272,1.6977,1.0306,3.9217,,0.5961,12.3162
gpt-5.2-codex,1.2291,1.5988,1.8486,4.3947,22.8094,24.3917,,7.0443,5.0861,6.762,3.7657,5.8009,,3.3745,3.2051,3.6011,3.424,5.8971,1.1831,13.3448,,0.5076,10.5383
gpt-5.3-codex-high,0.7865,1.0061,1.5207,1.6306,5.5683,13.8047,,4.2924,2.8525,3.1053,3.5004,4.5567,,2.8114,1.7137,2.5051,2.6617,2.8453,1.3109,4.5989,,0.7884,4.8932
gpt-5.4-xhigh,2.6687,10.6719,20.5204,7.9421,29.8797,62.4069,,21.3757,10.8401,14.2973,27.31,18.1197,,23.2705,7.9064,21.6154,23.9794,19.6961,5.6004,25.3987,,2.4179,23.4908
gpt-5.5-xhigh,1.5302,5.6389,12.3428,4.7743,47.4464,51.423,,14.0512,6.3185,10.4094,14.1513,22.1269,,11.1038,5.4863,10.5239,9.7203,9.5914,3.7872,15.8573,,1.5801,23.0294
grok-build-0.1,0.0491,0.0369,0.0618,0.0298,0.334,0.156,,0.1853,0.119,0.109,0.0474,0.1144,,0.0416,0.0464,0.0467,0.044,0.1049,0.1965,0.077,,0.0406,0.1095
kimi-k2.6-thinking,0.8528,1.3505,1.9175,5.2211,12.5083,21.9097,,4.2483,4.5579,5.6148,3.6765,4.1842,,3.9214,3.1094,3.0444,2.7652,5.2091,1.0471,4.663,,0.8503,6.0012
kimi-k2.7-code,0.3807,0.8355,0.8216,1.3963,10.158,11.4094,,2.3841,2.1515,3.1487,2.7538,3.4542,,2.5998,1.3815,1.9603,2.1367,2.6118,0.5836,3.2884,,0.3385,3.7352
minimax-m3,0.2187,0.3168,0.422,0.7462,3.4049,3.674,,0.5763,0.9437,0.2939,1.1241,0.6972,,0.9813,0.5243,0.8252,0.8864,0.7257,0.1823,0.4291,,0.1648,2.3803
nemotron-3-ultra-550b-a55b,0.3803,0.3921,0.6255,1.4881,2.1573,4.0329,,0.314,0.9438,0.2226,1.3502,1.3248,,1.3503,0.5632,1.2476,1.3198,1.5851,0.4101,0.5208,,0.2795,0.0869
qwen3.7-max,1.6922,1.0987,2.0062,1.997,13.7477,36.0024,,5.3916,6.0642,2.3137,4.2576,2.4092,,4.0372,4.4381,3.9201,3.603,1.8632,2.5036,1.5234,,1.112,9.2771
29 changes: 27 additions & 2 deletions src/App.css
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,10 @@ td {
td {
padding: 6px;
}

.cost {
display: none;
}
}

/* Style adjustments for medium screens */
Expand Down Expand Up @@ -242,6 +246,19 @@ td {
}

.other-controls {
display: flex;
flex-wrap: wrap;
align-items: center;
justify-content: center;
align-self: stretch;
row-gap: 0.5rem;
margin-bottom: 0.5rem;
}

.clear-filters-row {
display: flex;
justify-content: center;
align-self: stretch;
margin-bottom: 1rem;
}

Expand All @@ -251,7 +268,6 @@ td {
border: none;
padding: 0.5rem 1rem;
cursor: pointer;
margin-left: 1rem;
}

.section {
Expand Down Expand Up @@ -302,4 +318,13 @@ thead th:first-child {

.liveswebench-callout:hover {
background-color: rgb(122, 220, 233);
}
}

.cost {
display: block;
margin-top: 2px;
font-size: 0.78em;
color: #888;
font-variant-numeric: tabular-nums;
white-space: nowrap;
}
6 changes: 6 additions & 0 deletions src/Table/Averaging.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,9 @@ export const calculateAverage = (row, columns, fixedSize) => {
const average = validValues.length > 0 ? validValues.reduce((a, b) => a + b, 0) / validValues.length : NaN;
return isNaN(average) ? '-' : fixedSize ? average.toFixed(fixedSize) : average;
};

export const sumColumns = (row, columns) => {
if (!row || !columns) return null;
const validValues = columns.map(col => parseFloat(row[col])).filter(val => !isNaN(val));
return validValues.length > 0 ? validValues.reduce((a, b) => a + b, 0) : null;
};
79 changes: 71 additions & 8 deletions src/Table/CSVTable.jsx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// src/Table/CSVTable.jsx
import React, { useState, useEffect, useMemo, useCallback } from 'react';
import Papa from 'papaparse';
import { calculateAverage, getGlobalAverage } from './Averaging';
import { calculateAverage, getGlobalAverage, getGlobalAverageColumns, sumColumns } from './Averaging';
import { useTable } from "./SortTable";
import { getModelInfo, getVariantGroup } from './modelLinks';
import { useSearchParams } from 'react-router-dom';
Expand All @@ -11,6 +11,7 @@ import Select from 'react-select';
const CSVTable = ({dateStr}) => {
const date = new Date(dateStr).toISOString().split('T')[0].replaceAll('-', '_');
const [data, setData] = useState([]);
const [cost, setCost] = useState({});
const [categories, setCategories] = useState({});
const [checkedCategories, setCheckedCategories] = useState({});
const [screenWidth, setScreenWidth] = useState(window.innerWidth);
Expand All @@ -23,8 +24,9 @@ const CSVTable = ({dateStr}) => {
const [showOpenWeights, setShowOpenWeights] = useState(false);
const [showVariants, setShowVariants] = useState(false);
const [showHighUnseenBias, setShowHighUnseenBias] = useState(true);
const [showCost, setShowCost] = useState(false);

const updateURL = (checkedCategories, newFilter, newSortField = null, newSortOrder = null, newShowProvider = null, newShowApiName = null, newShowReasoners = null, newShowOpenWeights = null, newShowVariants = null, newShowHighUnseenBias = null, newSearchQuery = null) => {
const updateURL = (checkedCategories, newFilter, newSortField = null, newSortOrder = null, newShowProvider = null, newShowApiName = null, newShowReasoners = null, newShowOpenWeights = null, newShowVariants = null, newShowHighUnseenBias = null, newSearchQuery = null, newShowCost = null) => {
const params = new URLSearchParams();

let allAverages = true;
Expand Down Expand Up @@ -72,13 +74,15 @@ const CSVTable = ({dateStr}) => {
const effectiveShowOpenWeights = newShowOpenWeights !== null ? newShowOpenWeights : showOpenWeights;
const effectiveShowVariants = newShowVariants !== null ? newShowVariants : showVariants;
const effectiveShowHighUnseenBias = newShowHighUnseenBias !== null ? newShowHighUnseenBias : showHighUnseenBias;
const effectiveShowCost = newShowCost !== null ? newShowCost : showCost;

if (!effectiveShowProvider) params.set('provider', 'false');
if (effectiveShowApiName) params.set('api', 'true');
if (!effectiveShowReasoners) params.set('reasoners', 'false');
if (effectiveShowOpenWeights) params.set('openweight', 'true');
if (effectiveShowVariants) params.set('variants', 'true');
if (effectiveShowHighUnseenBias) params.set('highunseenbias', 'true');
if (effectiveShowCost) params.set('cost', 'true');

if (allAverages && !anySubcategories) {
const newParams = new URLSearchParams();
Expand All @@ -104,6 +108,7 @@ const CSVTable = ({dateStr}) => {
if (effectiveShowOpenWeights) newParams.set('openweight', 'true');
if (effectiveShowVariants) newParams.set('variants', 'true');
if (effectiveShowHighUnseenBias) newParams.set('highunseenbias', 'true');
if (effectiveShowCost) newParams.set('cost', 'true');
setSearchParams(newParams);
return;
}
Expand Down Expand Up @@ -158,6 +163,7 @@ const CSVTable = ({dateStr}) => {
const [sortedData, handleSorting, handleSearch, handleFilter, sortField, sortOrder, searchQuery, filter] = useTable(data, columns, checkedCategories, categories, 'model', getModelInfo);

useEffect(() => {
setCost({});
fetch(process.env.PUBLIC_URL + `/table_${date}.csv`)
.then(response => response.text())
.then(text => {
Expand All @@ -171,6 +177,28 @@ const CSVTable = ({dateStr}) => {
});
});

fetch(process.env.PUBLIC_URL + `/cost_${date}.csv`)
.then(response => (response.ok ? response.text() : null))
.then(text => {
if (!text || !text.startsWith('model,')) {
setCost({});
return;
}
Papa.parse(text, {
header: true,
dynamicTyping: true,
skipEmptyLines: true,
complete: (result) => {
const map = {};
result.data.forEach(r => {
if (r && r.model != null) map[String(r.model).toLowerCase()] = r;
});
setCost(map);
}
});
})
.catch(() => setCost({}));

fetch(process.env.PUBLIC_URL + `/categories_${date}.json`)
.then(response => response.json())
.then(json => {
Expand Down Expand Up @@ -239,6 +267,9 @@ const CSVTable = ({dateStr}) => {
} else if (key === 'highunseenbias') {
setShowHighUnseenBias(value === 'true');
return;
} else if (key === 'cost') {
setShowCost(value === 'true');
return;
} else if (Object.keys(categories).includes(key)) {
if (value.includes('a')) {
updatedCategories[key].average = true;
Expand Down Expand Up @@ -285,7 +316,7 @@ const CSVTable = ({dateStr}) => {
return;
}
updateURL(checkedCategories, filter);
}, [showProvider, showApiName, showReasoners, showOpenWeights, showVariants, showHighUnseenBias]);
}, [showProvider, showApiName, showReasoners, showOpenWeights, showVariants, showHighUnseenBias, showCost]);

const handleCheckboxChange = (clickedCategory, type) => {

Expand Down Expand Up @@ -421,9 +452,10 @@ const CSVTable = ({dateStr}) => {
setShowOpenWeights(false);
setShowVariants(false);
setShowHighUnseenBias(true);
setShowCost(false);

// Update URL with default values (including empty search)
updateURL(defaultCategories, {}, 'ga', 'desc', true, false, true, false, false, false, '');
updateURL(defaultCategories, {}, 'ga', 'desc', true, false, true, false, false, false, '', false);
}

// Utility to compute class for sorting
Expand All @@ -433,6 +465,15 @@ const CSVTable = ({dateStr}) => {

const numCheckedCategories = Object.values(checkedCategories).filter(cat => cat.average || cat.allSubcategories).length;

const hasCost = Object.keys(cost).length > 0;
const costOn = showCost && hasCost;
const fmtCost = (v) => {
if (v == null || isNaN(v)) return '—';
if (v >= 0.1) return `$${v.toFixed(2)}`;
if (v >= 0.01) return `$${v.toFixed(3)}`;
return `$${v.toFixed(4)}`;
};

const modelProviders = Array.from(new Set(data.map(row => getModelInfo(row.model)?.organization ?? 'Unknown'))).sort();

// Create a map to identify models with duplicate display names
Expand Down Expand Up @@ -559,6 +600,12 @@ const CSVTable = ({dateStr}) => {
<input type="checkbox" checked={showHighUnseenBias} onChange={() => setShowHighUnseenBias(!showHighUnseenBias)} id="showHighUnseenBias" />
<span style={{marginLeft: '0.5rem'}}>Show High Unseen Question Bias Models</span>
</label>
{hasCost && <label style={{whiteSpace: 'nowrap', marginLeft: '1rem'}}>
<input type="checkbox" checked={showCost} onChange={() => setShowCost(!showCost)} id="showCost" />
<span style={{marginLeft: '0.5rem'}}>Show Cost (USD)</span>
</label>}
</div>
<div className="clear-filters-row">
<button onClick={handleResetFilters} className="clear-filters-button">Clear Filters</button>
</div>
<div className="search-bar">
Expand Down Expand Up @@ -641,6 +688,14 @@ const CSVTable = ({dateStr}) => {
return name;
})();

const costRow = cost[String(row.model ?? '').toLowerCase()];
const covered = !!costRow;
const costCell = (columns, isAnchor) =>
!costOn ? null
: covered ? <span className="cost">{fmtCost(sumColumns(costRow, columns))}</span>
: isAnchor ? <span className="cost">n/a</span>
: null;

return (
<tr key={index}>
<td className="sticky-col model-col">
Expand All @@ -650,17 +705,25 @@ const CSVTable = ({dateStr}) => {
{info.note && <><br/><small>{info.note}</small></>}
</td>
{showProvider && <td className="sticky-col organization-col">{info?.organization ?? ''}</td>}
{numCheckedCategories > 1 && <td className="sticky-col globalAverage-col">{getGlobalAverage(row, checkedCategories, categories)}</td>}
{numCheckedCategories > 1 && <td className="sticky-col globalAverage-col">
{getGlobalAverage(row, checkedCategories, categories)}
{costCell(getGlobalAverageColumns(checkedCategories, categories), true)}
</td>}
{Object.entries(checkedCategories).flatMap(([category, checks]) => {
const res = [];
if (checks.average) {
res.push(calculateAverage(row, categories[category], 2));
res.push({ value: calculateAverage(row, categories[category], 2), columns: categories[category] });
}
if (checks.allSubcategories) {
categories[category].forEach(subCat => res.push(row[subCat] == null ? '-' : parseInt(row[subCat]) === row[subCat] ? row[subCat] : row[subCat]));
categories[category].forEach(subCat => res.push({ value: row[subCat] == null ? '-' : row[subCat], columns: [subCat] }));
}
return res;
}).map((cell, idx) => <td key={idx}>{cell}</td>)}
}).map((cell, idx) => (
<td key={idx}>
{cell.value}
{costCell(cell.columns, numCheckedCategories <= 1 && idx === 0)}
</td>
))}
</tr>
);
})}
Expand Down