Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/1168.changed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
- Export separate partnership and S-corporation income leaves, and rename the partnership self-employment tax allocation to partnership net earnings from self-employment.
7 changes: 4 additions & 3 deletions docs/appendix.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,13 @@ for iteration in range(5000):

### Table A1: Complete List of Imputed Variables

#### Variables Imputed from IRS Public Use File (57 variables)
#### Variables Imputed from IRS Public Use File (58 variables)

**Income Variables:**

- employment_income
- partnership_s_corp_income
- partnership_income
- s_corp_income
- social_security
- taxable_pension_income
- tax_exempt_pension_income
Expand All @@ -68,7 +69,7 @@ for iteration in range(5000):
- estate_income
- miscellaneous_income
- farm_income
- partnership_se_income
- partnership_self_employment_net_earnings
- alimony_income
- farm_rent_income
- non_sch_d_capital_gains
Expand Down
21 changes: 17 additions & 4 deletions policyengine_us_data/calibration/puf_impute.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@
"non_sch_d_capital_gains",
"long_term_capital_gains_on_collectibles",
"unrecaptured_section_1250_gain",
"partnership_s_corp_income",
"partnership_income",
"s_corp_income",
"self_employment_income",
"sstb_self_employment_income",
"rental_income",
Expand All @@ -70,6 +71,12 @@
"charitable_non_cash_donations",
)

FORMULAIC_BUSINESS_AGGREGATES_TO_DROP = frozenset(
{
"partnership_s_corp_income",
}
)

DEMOGRAPHIC_PREDICTORS = [
"age",
"is_male",
Expand All @@ -82,7 +89,8 @@

IMPUTED_VARIABLES = [
"employment_income",
"partnership_s_corp_income",
"partnership_income",
"s_corp_income",
"social_security",
"taxable_pension_income",
"interest_deduction",
Expand Down Expand Up @@ -117,7 +125,7 @@
"miscellaneous_income",
"alimony_expense",
"farm_income",
"partnership_se_income",
"partnership_self_employment_net_earnings",
"alimony_income",
"health_savings_account_ald",
"non_sch_d_capital_gains",
Expand Down Expand Up @@ -148,7 +156,8 @@
]

OVERRIDDEN_IMPUTED_VARIABLES = [
"partnership_s_corp_income",
"partnership_income",
"s_corp_income",
"interest_deduction",
"unreimbursed_business_employee_expenses",
"pre_tax_contributions",
Expand Down Expand Up @@ -589,6 +598,10 @@ def _map_to_entity(pred_values, variable_name):

new_data = {}
for variable, time_dict in data.items():
if variable in FORMULAIC_BUSINESS_AGGREGATES_TO_DROP:
logger.info("Dropping formulaic business aggregate: %s", variable)
continue

if variable in PUF_REPORTED_CALCULATED_TAX_OUTPUT_VARIABLES:
logger.info("Dropping PUF tax-output variable: %s", variable)
continue
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@
"long_term_capital_gains_on_collectibles",
"long_term_capital_gains_on_small_business_stock",
"other_net_gain",
"partnership_s_corp_income",
"partnership_income",
"s_corp_income",
"farm_income",
"farm_rent_income",
"estate_income",
Expand Down Expand Up @@ -123,8 +124,9 @@
"miscellaneous_income",
"non_qualified_dividend_income",
"non_sch_d_capital_gains",
"partnership_s_corp_income",
"partnership_se_income",
"partnership_income",
"s_corp_income",
"partnership_self_employment_net_earnings",
"qualified_bdc_income",
"qualified_dividend_income",
"qualified_reit_and_ptp_income",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -304,8 +304,9 @@ def create_h6_reform():
"non_qualified_dividend_income",
"taxable_interest_income",
"tax_exempt_interest_income",
"partnership_s_corp_income",
"partnership_se_income",
"partnership_income",
"s_corp_income",
"partnership_self_employment_net_earnings",
"estate_income",
"rental_income",
"farm_income",
Expand Down
38 changes: 29 additions & 9 deletions policyengine_us_data/datasets/puf/puf.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,11 +251,14 @@ def sample_exposure_scaled_beta(base, params, rng):

def simulate_investment_qbi_income_from_puf(puf, *, rng):
"""Simulate qualified REIT/PTP and BDC income from observed exposures."""
partnership_s_corp_income = puf_column_values(
puf, "partnership_income"
) + puf_column_values(puf, "s_corp_income")
if "partnership_income" not in puf and "s_corp_income" not in puf:
partnership_s_corp_income = puf_column_values(puf, "partnership_s_corp_income")
exposure_bases = {
"non_qualified_dividend_income": non_qualified_dividend_income_from_puf(puf),
"partnership_s_corp_income": puf_column_values(
puf, "partnership_s_corp_income"
),
"partnership_s_corp_income": partnership_s_corp_income,
}

qualified_reit_and_ptp_income = np.zeros(len(puf), dtype=float)
Expand Down Expand Up @@ -673,6 +676,9 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
s_corp_income = puf.E26190 - puf.E26180
# Schedule E active partnership income
partnership_income = puf.E25980 - puf.E25960
puf["s_corp_income"] = s_corp_income
puf["partnership_income"] = partnership_income
# Keep the combined value as an internal QBI simulation helper only.
puf["partnership_s_corp_income"] = s_corp_income + partnership_income
# Schedule F active farming operations
puf["farm_operations_income"] = puf.E02100
Expand Down Expand Up @@ -737,7 +743,7 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
- puf["E25960"].fillna(0)
) != 0
partnership_se = np.where(has_partnership, gross_se - schedule_c_f_income, 0)
puf["partnership_se_income"] = partnership_se
puf["partnership_self_employment_net_earnings"] = partnership_se

# --- Qualified Business Income Deduction (QBID) simulation ---
puf = add_qbi_qualification_flags_to_puf(puf, seed=QBI_QUALIFICATION_SEED)
Expand Down Expand Up @@ -866,8 +872,9 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
"sstb_w2_wages_from_qualified_business",
"sstb_unadjusted_basis_qualified_property",
"deductible_mortgage_interest",
"partnership_s_corp_income",
"partnership_se_income",
"partnership_income",
"s_corp_income",
"partnership_self_employment_net_earnings",
"qualified_reit_and_ptp_income",
"qualified_bdc_income",
"attends_eligible_educational_institution_for_lifetime_learning_credit",
Expand Down Expand Up @@ -1005,6 +1012,8 @@ def _qbi_simulation_overrides(
length = None
for key in (
*QBI_SOURCE_NAMES,
"partnership_income",
"s_corp_income",
"sstb_self_employment_income",
"business_is_sstb",
"w2_wages_from_qualified_business",
Expand Down Expand Up @@ -1098,9 +1107,20 @@ def _qbi_simulation_overrides(

source_arrays = {}
for source in QBI_SOURCE_NAMES:
source_arrays[source] = self._values_from_file_or_overrides(
file_handle, source, existing_overrides, length
).astype(float)
if (
source == "partnership_s_corp_income"
and source not in file_handle
and source not in existing_overrides
):
source_arrays[source] = self._values_from_file_or_overrides(
file_handle, "partnership_income", existing_overrides, length
).astype(float) + self._values_from_file_or_overrides(
file_handle, "s_corp_income", existing_overrides, length
).astype(float)
else:
source_arrays[source] = self._values_from_file_or_overrides(
file_handle, source, existing_overrides, length
).astype(float)

source_arrays["self_employment_income"] = (
self._values_from_file_or_overrides(
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ classifiers = [
"Programming Language :: Python :: 3.14",
]
dependencies = [
"policyengine-us==1.715.3",
"policyengine-us==1.729.0",
# policyengine-core 3.26.1 is the current 3.26.x runtime and includes the fix for
# PolicyEngine/policyengine-core#482 (user-set ETERNITY inputs lost
# after _invalidate_all_caches) and is required by policyengine-us 1.682.1+.
Expand Down
22 changes: 16 additions & 6 deletions tests/unit/calibration/test_calibration_puf_impute.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,11 +271,15 @@ def fake_run_qrf_imputation(*args, **kwargs):

def test_puf_only_variables_are_imputed_onto_cps_half(self, monkeypatch):
data = _make_mock_data(n_persons=20, n_households=5)
assert "partnership_s_corp_income" not in data
assert "partnership_income" not in data
assert "s_corp_income" not in data
data["partnership_s_corp_income"] = {2024: np.full(20, 123, dtype=np.float32)}

predictions = np.arange(20, dtype=np.float32) + 100
partnership_predictions = np.arange(20, dtype=np.float32) + 100
s_corp_predictions = np.arange(20, dtype=np.float32) + 200
y_full = {var: np.ones(20, dtype=np.float32) for var in IMPUTED_VARIABLES}
y_full["partnership_s_corp_income"] = predictions
y_full["partnership_income"] = partnership_predictions
y_full["s_corp_income"] = s_corp_predictions
y_full["employment_income"] = np.full(20, 999_999, dtype=np.float32)

def fake_run_qrf_imputation(*args, **kwargs):
Expand All @@ -295,9 +299,15 @@ def fake_run_qrf_imputation(*args, **kwargs):
skip_qrf=False,
)

partnership = result["partnership_s_corp_income"][2024]
np.testing.assert_array_equal(partnership[:20], predictions)
np.testing.assert_array_equal(partnership[20:], predictions)
assert "partnership_s_corp_income" not in result

partnership = result["partnership_income"][2024]
np.testing.assert_array_equal(partnership[:20], partnership_predictions)
np.testing.assert_array_equal(partnership[20:], partnership_predictions)

s_corp = result["s_corp_income"][2024]
np.testing.assert_array_equal(s_corp[:20], s_corp_predictions)
np.testing.assert_array_equal(s_corp[20:], s_corp_predictions)

employment = result["employment_income"][2024]
np.testing.assert_array_equal(employment[:20], data["employment_income"][2024])
Expand Down
21 changes: 17 additions & 4 deletions tests/unit/datasets/test_puf_qbi.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,8 @@ def mutate(params):
{
"qualified_dividend_income": [900.0, 0.0, 0.0],
"non_qualified_dividend_income": [100.0, 0.0, 0.0],
"partnership_s_corp_income": [0.0, 200.0, 0.0],
"partnership_income": [0.0, 150.0, 0.0],
"s_corp_income": [0.0, 50.0, 0.0],
}
)

Expand Down Expand Up @@ -271,16 +272,25 @@ def mutate(params):
params["ubia_simulation"]["capital_intensity_probabilities"][source] = 1.0
for source in params["sstb_prob_map_by_source_name"]:
params["sstb_prob_map_by_source_name"][source] = 0.0
params["reit_ptp_income_distribution"] = {
"partnership_s_corp_income": {
"probability_of_receiving": 1.0,
"beta_a": 1.0,
"beta_b": 1.0,
"scale": 0.0,
"shift": 0.25,
}
}
params["bdc_income_distribution"] = {}

_set_qbi_params(monkeypatch, mutate)
with h5py.File(DummyPUF.file_path, "w") as file_handle:
file_handle.create_dataset("household_id", data=np.array([1, 2]))
file_handle.create_dataset(
"self_employment_income", data=np.array([10_000.0, 0.0])
)
file_handle.create_dataset(
"partnership_s_corp_income", data=np.array([0.0, 20_000.0])
)
file_handle.create_dataset("partnership_income", data=np.array([0.0, 12_000.0]))
file_handle.create_dataset("s_corp_income", data=np.array([0.0, 8_000.0]))
for source in set(puf_module.QBI_SOURCE_NAMES) - {
"self_employment_income",
"partnership_s_corp_income",
Expand All @@ -297,6 +307,9 @@ def mutate(params):
assert "qualified_bdc_income" in arrays
np.testing.assert_array_equal(arrays["business_is_sstb"], np.array([False, False]))
assert np.all(arrays["unadjusted_basis_qualified_property"] > 0)
np.testing.assert_allclose(
arrays["qualified_reit_and_ptp_income"], np.array([0.0, 5_000.0])
)


def test_puf_load_dataset_repairs_qbi_with_person_level_length(tmp_path, monkeypatch):
Expand Down
45 changes: 32 additions & 13 deletions tests/unit/test_long_term_calibration_contract.py
Original file line number Diff line number Diff line change
Expand Up @@ -3071,7 +3071,8 @@ def test_compose_role_donor_rows_falls_back_for_missing_dependents():
enriched = df.copy()
enriched["__pe_payroll_uprating_factor"] = 2.0
enriched["__pe_ss_uprating_factor"] = 3.0
enriched["partnership_s_corp_income__2024"] = 1_000_000.0
enriched["partnership_income__2024"] = 600_000.0
enriched["s_corp_income__2024"] = 400_000.0
enriched["taxable_interest_income__2024"] = 500_000.0

older_rows = enriched[enriched["person_tax_unit_id__2024"] == 201].copy()
Expand Down Expand Up @@ -3133,14 +3134,21 @@ def test_compose_role_donor_rows_can_sanitize_worker_non_target_income():
0.0,
5_000.0,
]
enriched["partnership_s_corp_income__2024"] = [
enriched["partnership_income__2024"] = [
0.0,
0.0,
6_000.0,
3_000.0,
0.0,
8_000.0,
4_000.0,
]
enriched["partnership_se_income__2024"] = [
enriched["s_corp_income__2024"] = [
0.0,
0.0,
3_000.0,
0.0,
4_000.0,
]
enriched["partnership_self_employment_net_earnings__2024"] = [
0.0,
0.0,
-9_000.0,
Expand Down Expand Up @@ -3192,7 +3200,8 @@ def test_compose_role_donor_rows_can_sanitize_worker_non_target_income():
pytest.approx(0.0)
)
assert worker_clone["taxable_private_pension_income__2024"] == pytest.approx(0.0)
assert worker_clone["partnership_s_corp_income__2024"] == pytest.approx(0.0)
assert worker_clone["partnership_income__2024"] == pytest.approx(0.0)
assert worker_clone["s_corp_income__2024"] == pytest.approx(0.0)
assert worker_clone["employment_income_before_lsr__2024"] == pytest.approx(50_000.0)
assert (
"long_term_capital_gains_before_response__2024"
Expand Down Expand Up @@ -3223,14 +3232,21 @@ def test_compose_role_donor_rows_can_sanitize_all_clone_non_target_income():
0.0,
5_000.0,
]
enriched["partnership_s_corp_income__2024"] = [
enriched["partnership_income__2024"] = [
0.0,
0.0,
6_000.0,
3_000.0,
0.0,
8_000.0,
4_000.0,
]
enriched["s_corp_income__2024"] = [
0.0,
0.0,
3_000.0,
0.0,
4_000.0,
]
enriched["partnership_se_income__2024"] = [
enriched["partnership_self_employment_net_earnings__2024"] = [
0.0,
0.0,
-9_000.0,
Expand Down Expand Up @@ -3283,16 +3299,19 @@ def test_compose_role_donor_rows_can_sanitize_all_clone_non_target_income():
pytest.approx(0.0)
)
assert clone["taxable_private_pension_income__2024"] == pytest.approx(0.0)
assert clone["partnership_s_corp_income__2024"] == pytest.approx(0.0)
assert clone["partnership_se_income__2024"] == pytest.approx(0.0)
assert clone["partnership_income__2024"] == pytest.approx(0.0)
assert clone["s_corp_income__2024"] == pytest.approx(0.0)
assert clone["partnership_self_employment_net_earnings__2024"] == pytest.approx(
0.0
)
assert older_clone["social_security_retirement__2024"] == pytest.approx(20_000.0)
assert worker_clone["employment_income_before_lsr__2024"] == pytest.approx(50_000.0)
assert (
"long_term_capital_gains_before_response__2024"
in clone_df.attrs["sanitized_clone_non_target_income_columns"]
)
assert (
"partnership_se_income__2024"
"partnership_self_employment_net_earnings__2024"
in clone_df.attrs["sanitized_clone_non_target_income_columns"]
)
assert "sanitized_worker_non_target_income_columns" not in clone_df.attrs
Loading
Loading