Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ jobs:
python-version: ["3.10", "3.11"]
steps:
- uses: actions/checkout@v6
- name: Enable Git long paths on Windows
if: runner.os == 'Windows'
run: git config --global core.longpaths true
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v6
with:
Expand Down
11 changes: 9 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -280,8 +280,15 @@ The emulator accepts CSV files with the following variables:
| ssemp | Spouse self-employment income |
| gssi | Social security retirement benefits |
| pensions | Taxable private pension income |
| scorp | Partnership/S-corp income |
| pbusinc | Primary taxpayer business income that qualifies for the QBID |
| scorp | S-corp profits |
| pbusinc | Primary and secondary taxpayer active QBI |

PolicyEngine-US separately supports `partnership_self_employment_net_earnings`
for partnership income subject to self-employment tax from Schedule K-1 Box 14.
TAXSIM-35 has no separate input for that allocation, so policyengine-taxsim
does not populate `partnership_self_employment_net_earnings`. Use `psemp` and
`ssemp` for TAXSIM self-employment income, and use `scorp` and `pbusinc` for
TAXSIM's S-corp and QBID fields.


### Expenses
Expand Down
1 change: 1 addition & 0 deletions changelog.d/977.changed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Mapped TAXSIM `scorp` to PolicyEngine's S-corporation income input and documented that TAXSIM business-income inputs do not populate PolicyEngine's partnership self-employment tax allocation variable.
8 changes: 4 additions & 4 deletions dashboard/public/config-data.json
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,10 @@
},
{
"taxsim": "scorp",
"policyengine": "partnership_s_corp_income",
"policyengine": "s_corp_income",
"description": "S-Corp profits",
"implemented": true,
"githubLink": "https://github.com/PolicyEngine/policyengine-us/blob/master/policyengine_us/variables/household/income/person/self_employment/partnership_s_corp_income.py"
"githubLink": "https://github.com/PolicyEngine/policyengine-us/blob/master/policyengine_us/variables/household/income/person/self_employment/s_corp_income.py"
},
{
"taxsim": "pbusinc",
Expand Down Expand Up @@ -223,7 +223,7 @@
"taxable_interest_income",
"qualified_dividend_income",
"long_term_capital_gains",
"partnership_s_corp_income",
"s_corp_income",
"taxable_private_pension_income",
"short_term_capital_gains",
"social_security_retirement"
Expand Down Expand Up @@ -281,4 +281,4 @@
]
},
"lastUpdated": "2026-03-24T15:14:08.434Z"
}
}
4 changes: 2 additions & 2 deletions dashboard/scripts/extract-config.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ function generatePolicyEngineGitHubLink(variableName) {
'taxable_private_pension_income': 'household/income/person/retirement/taxable_private_pension_income.py',
'social_security_retirement': 'gov/ssa/ss/social_security_retirement.py',
'unemployment_compensation': 'gov/states/unemployment_compensation.py',
'partnership_s_corp_income': 'household/income/person/self_employment/partnership_s_corp_income.py',
's_corp_income': 'household/income/person/self_employment/s_corp_income.py',
'qualified_business_income': 'gov/irs/income/taxable_income/deductions/qualified_business_income_deduction/qualified_business_income.py',

// Expense/Deduction variables
Expand Down Expand Up @@ -247,7 +247,7 @@ function extractIncomeSplittingRules() {
'taxable_interest_income',
'qualified_dividend_income',
'long_term_capital_gains',
'partnership_s_corp_income',
's_corp_income',
'taxable_private_pension_income',
'short_term_capital_gains',
'social_security_retirement'
Expand Down
2 changes: 1 addition & 1 deletion dashboard/src/components/DocumentationContent.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -1079,7 +1079,7 @@ policyengine-taxsim policyengine input.csv --disable-salt --assume-w2-wages --lo
['gssi', 'social_security_* (all types)', 'Sum retirement + disability + survivors + dependents'],
['pui', 'unemployment_compensation (head)', 'Direct'],
['sui', 'unemployment_compensation (spouse)', 'Direct'],
['scorp', 'partnership_s_corp_income', 'Sum across tax unit'],
['scorp', 's_corp_income', 'Sum across tax unit'],
['proptax', 'real_estate_taxes (household)', 'Direct'],
['mortgage', 'deductible_mortgage_interest (household)', 'Direct'],
['rentpaid', 'rent (household)', 'Direct'],
Expand Down
5 changes: 4 additions & 1 deletion policyengine_taxsim/config/variable_mappings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -796,10 +796,13 @@ taxsim_to_policyengine:
- dividends
- long_term_capital_gains:
- ltcg
# TAXSIM has no separate Schedule K-1 Box 14 partnership
# self-employment allocation, so do not populate PolicyEngine's
# partnership_self_employment_net_earnings from TAXSIM inputs.
- self_employment_income:
- psemp
- ssemp
- partnership_s_corp_income:
- s_corp_income:
- scorp
- qualified_business_income:
- pbusinc
Expand Down
2 changes: 1 addition & 1 deletion policyengine_taxsim/core/input_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def add_additional_units(state, year, situation, taxsim_vars):
"taxable_interest_income",
"qualified_dividend_income",
"long_term_capital_gains",
"partnership_s_corp_income",
"s_corp_income",
"taxable_private_pension_income",
"short_term_capital_gains",
"social_security_retirement",
Expand Down
2 changes: 1 addition & 1 deletion policyengine_taxsim/core/yaml_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def generate_yaml(
"long_term_capital_gains",
"short_term_capital_gains",
"rental_income",
"partnership_s_corp_income",
"s_corp_income",
"qualified_business_income",
"w2_wages_from_qualified_business",
"business_is_sstb",
Expand Down
15 changes: 3 additions & 12 deletions policyengine_taxsim/runners/policyengine_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np
import tempfile
from pathlib import Path
from typing import Dict, Any
from typing import Dict
from tqdm import tqdm
from .base_runner import BaseTaxRunner

Expand All @@ -14,8 +14,6 @@
SOI_TO_FIPS_MAP,
get_state_code,
get_state_number,
to_roundedup_number,
convert_taxsim32_dependents,
)
from policyengine_taxsim.core.state_output_resolver import (
calculate_output_adapter,
Expand All @@ -24,10 +22,7 @@
has_state_variable_mapping,
is_output_adapter,
)
from policyengine_taxsim.core.input_mapper import (
set_taxsim_defaults,
get_taxsim_defaults,
)
from policyengine_taxsim.core.input_mapper import get_taxsim_defaults

from policyengine_us import Microsimulation
from policyengine_core.data import Dataset
Expand Down Expand Up @@ -190,7 +185,7 @@ def _initialize_dataset_structure(self) -> dict:
"taxable_interest_income",
"qualified_dividend_income",
"long_term_capital_gains",
"partnership_s_corp_income",
"s_corp_income",
"short_term_capital_gains",
}
)
Expand Down Expand Up @@ -570,7 +565,6 @@ def _process_person_data_for_year(self, year_data: pd.DataFrame, year: int) -> d
# Standard variable mapping
primary_source = mapping.get("primary")
spouse_source = mapping.get("spouse")
default_val = mapping.get("default", 0.0)

# Primary values
if (
Expand Down Expand Up @@ -732,8 +726,6 @@ def _apply_defaults_vectorized(self, df: pd.DataFrame) -> pd.DataFrame:

def generate(self) -> None:
"""Generate the dataset with all TAXSIM records."""
n_records = len(self.input_df)

# Ensure all required columns exist with default values
self.input_df = self._ensure_required_columns(self.input_df)

Expand All @@ -743,7 +735,6 @@ def generate(self) -> None:

# Extract years (assuming all records might have different years)
# Years should already be converted to integers in the run() method
years = self.input_df["year"].values
unique_years = sorted(self.input_df["year"].unique())

# Use SOI to FIPS mapping from core utils
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ classifiers = [
"Programming Language :: Python :: 3.12",
]
dependencies = [
"policyengine-us>=1.711.0",
"policyengine-us @ git+https://github.com/PolicyEngine/policyengine-us.git@codex/partnership-se-semantics",
"pandas",
"PyYAML",
"click",
Expand Down Expand Up @@ -127,4 +127,4 @@ minversion = "6.0"
addopts = "-ra -q"
testpaths = [
"tests",
]
]
6 changes: 2 additions & 4 deletions scripts/convert_h5_to_taxsim.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,12 @@ def extract_taxsim_csv(sim, year: int) -> pd.DataFrame:

# Try to get S-corp income (only in ECPS)
try:
scorp_income = sim.calculate("partnership_s_corp_income", period).values
scorp_income = sim.calculate("s_corp_income", period).values
except Exception:
scorp_income = np.zeros_like(employment_income)

# Household-level
state_fips = sim.calculate("state_fips", period).values
household_weight = sim.calculate("household_weight", period).values

# Person -> household mapping
person_household_id = sim.calculate("person_household_id", period).values
Expand All @@ -162,7 +161,6 @@ def extract_taxsim_csv(sim, year: int) -> pd.DataFrame:

# Build household-level lookups
hh_state = dict(zip(household_id, state_fips))
hh_weight = dict(zip(household_id, household_weight))
hh_proptax = dict(zip(household_id, real_estate_taxes))
hh_mortgage = dict(zip(household_id, mortgage_interest))
hh_rent = dict(zip(household_id, rent))
Expand Down Expand Up @@ -295,7 +293,7 @@ def main():
df.to_csv(args.output, index=False)

# Print summary
print(f"\nSummary:")
print("\nSummary:")
print(f" Tax units: {len(df)}")
print(f" Single filers: {(df['mstat'] == 1).sum()}")
print(f" Joint filers: {(df['mstat'] == 2).sum()}")
Expand Down
62 changes: 62 additions & 0 deletions tests/test_mappers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import pytest
import pandas as pd

from policyengine_taxsim import generate_household, export_household
from policyengine_taxsim.runners.policyengine_runner import TaxsimMicrosimDataset


@pytest.fixture
Expand Down Expand Up @@ -180,6 +182,66 @@ def test_import_single_household_with_state_eq_0(sample_taxsim_input_with_state_
assert result == expected_output


def test_business_income_mapping_preserves_taxsim_business_api():
taxsim_input = {
"year": 2024,
"state": 5,
"mstat": 2,
"page": 45,
"sage": 43,
"pwages": 50_000,
"swages": 40_000,
"psemp": 30_000,
"ssemp": 7_000,
"scorp": 12_000,
"pbusinc": 15_000,
"taxsimid": 11,
"idtl": 2,
"depx": 0,
}

result = generate_household(taxsim_input)
people = result["people"]

assert people["you"]["self_employment_income"]["2024"] == 30_000
assert people["your partner"]["self_employment_income"]["2024"] == 7_000
assert people["you"]["s_corp_income"]["2024"] == 6_000
assert people["your partner"]["s_corp_income"]["2024"] == 6_000
assert people["you"]["qualified_business_income"]["2024"] == 15_000
assert "partnership_self_employment_net_earnings" not in people["you"]
assert "partnership_self_employment_net_earnings" not in people["your partner"]


def test_vectorized_business_income_mapping_preserves_taxsim_business_api():
records = pd.DataFrame(
[
{
"taxsimid": 11,
"year": 2024,
"state": 5,
"mstat": 2,
"pwages": 50_000,
"swages": 40_000,
"psemp": 30_000,
"ssemp": 7_000,
"scorp": 12_000,
"pbusinc": 15_000,
}
]
)
dataset = TaxsimMicrosimDataset(records)

mapping = dataset._get_taxsim_to_pe_variable_mapping()
row = records.iloc[0]

assert mapping["self_employment_income"]["primary"] == "psemp"
assert mapping["self_employment_income"]["spouse"] == "ssemp"
assert mapping["s_corp_income"]["primary"](row) == 6_000
assert mapping["s_corp_income"]["spouse"](row) == 6_000
assert mapping["qualified_business_income"]["primary"] == "pbusinc"
assert "partnership_self_employment_net_earnings" not in mapping


def test_export_single_household(sample_taxsim_input):
policyengine_single_household_situation = {
"families": {"your family": {"members": ["you"]}},
Expand Down
11 changes: 7 additions & 4 deletions tests/test_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,12 +178,15 @@ def test_generate_does_not_scale_linearly(self):
lambda x: int(float(x))
)
dataset = TaxsimMicrosimDataset(runner.input_df)
t0 = time.time()
t0 = time.perf_counter()
dataset.generate()
times[n] = time.time() - t0
times[n] = time.perf_counter() - t0
dataset.cleanup()

ratio = times[500] / max(times[100], 0.01)
# Very small timings are noisy on shared CI runners, especially on
# Windows. Keep the scaling guard from overreacting to sub-100ms
# denominators while still catching real row-wise regressions.
ratio = times[500] / max(times[100], 0.1)
assert ratio < 5.0, (
f"Generate phase scaled {ratio:.1f}x for 5x more records "
f"(100: {times[100]:.2f}s, 500: {times[500]:.2f}s). "
Expand Down Expand Up @@ -341,7 +344,7 @@ def counted_calc_tu(self_runner, sim, var_name, period):
return orig_calc_tu(self_runner, sim, var_name, period)

runner._calc_tax_unit = types.MethodType(counted_calc_tu, runner)
result = runner.run(show_progress=False)
runner.run(show_progress=False)

unique_states = records["state"].nunique()
# With unified state vars: ~30-60 _calc_tax_unit calls per PE pass.
Expand Down
2 changes: 1 addition & 1 deletion tests/test_spouse_income_splitting.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def _base_mfj_record(taxsimid=1, **overrides):
("dividends", "qualified_dividend_income", 80000),
("ltcg", "long_term_capital_gains", 60000),
("stcg", "short_term_capital_gains", 30000),
("scorp", "partnership_s_corp_income", 50000),
("scorp", "s_corp_income", 50000),
],
)
def test_mfj_household_income_splits_between_spouses(taxsim_field, pe_var, amount):
Expand Down
Loading
Loading