diff --git a/changelog.d/1168.changed.md b/changelog.d/1168.changed.md new file mode 100644 index 000000000..541895977 --- /dev/null +++ b/changelog.d/1168.changed.md @@ -0,0 +1 @@ +- Export separate partnership and S-corporation income leaves, and rename the partnership self-employment tax allocation to partnership net earnings from self-employment. diff --git a/docs/appendix.md b/docs/appendix.md index d1c84bcf8..554b45f4c 100644 --- a/docs/appendix.md +++ b/docs/appendix.md @@ -47,12 +47,13 @@ for iteration in range(5000): ### Table A1: Complete List of Imputed Variables -#### Variables Imputed from IRS Public Use File (57 variables) +#### Variables Imputed from IRS Public Use File (58 variables) **Income Variables:** - employment_income -- partnership_s_corp_income +- partnership_income +- s_corp_income - social_security - taxable_pension_income - tax_exempt_pension_income @@ -68,7 +69,7 @@ for iteration in range(5000): - estate_income - miscellaneous_income - farm_income -- partnership_se_income +- partnership_self_employment_net_earnings - alimony_income - farm_rent_income - non_sch_d_capital_gains diff --git a/policyengine_us_data/calibration/puf_impute.py b/policyengine_us_data/calibration/puf_impute.py index de7498178..54d41ddff 100644 --- a/policyengine_us_data/calibration/puf_impute.py +++ b/policyengine_us_data/calibration/puf_impute.py @@ -58,7 +58,8 @@ "non_sch_d_capital_gains", "long_term_capital_gains_on_collectibles", "unrecaptured_section_1250_gain", - "partnership_s_corp_income", + "partnership_income", + "s_corp_income", "self_employment_income", "sstb_self_employment_income", "rental_income", @@ -70,6 +71,12 @@ "charitable_non_cash_donations", ) +FORMULAIC_BUSINESS_AGGREGATES_TO_DROP = frozenset( + { + "partnership_s_corp_income", + } +) + DEMOGRAPHIC_PREDICTORS = [ "age", "is_male", @@ -82,7 +89,8 @@ IMPUTED_VARIABLES = [ "employment_income", - "partnership_s_corp_income", + "partnership_income", + "s_corp_income", "social_security", "taxable_pension_income", "interest_deduction", @@ -117,7 +125,7 @@ "miscellaneous_income", "alimony_expense", "farm_income", - "partnership_se_income", + "partnership_self_employment_net_earnings", "alimony_income", "health_savings_account_ald", "non_sch_d_capital_gains", @@ -148,7 +156,8 @@ ] OVERRIDDEN_IMPUTED_VARIABLES = [ - "partnership_s_corp_income", + "partnership_income", + "s_corp_income", "interest_deduction", "unreimbursed_business_employee_expenses", "pre_tax_contributions", @@ -589,6 +598,10 @@ def _map_to_entity(pred_values, variable_name): new_data = {} for variable, time_dict in data.items(): + if variable in FORMULAIC_BUSINESS_AGGREGATES_TO_DROP: + logger.info("Dropping formulaic business aggregate: %s", variable) + continue + if variable in PUF_REPORTED_CALCULATED_TAX_OUTPUT_VARIABLES: logger.info("Dropping PUF tax-output variable: %s", variable) continue diff --git a/policyengine_us_data/datasets/cps/long_term/prototype_synthetic_2100_support.py b/policyengine_us_data/datasets/cps/long_term/prototype_synthetic_2100_support.py index 35438ecc3..588e0f588 100644 --- a/policyengine_us_data/datasets/cps/long_term/prototype_synthetic_2100_support.py +++ b/policyengine_us_data/datasets/cps/long_term/prototype_synthetic_2100_support.py @@ -86,7 +86,8 @@ "long_term_capital_gains_on_collectibles", "long_term_capital_gains_on_small_business_stock", "other_net_gain", - "partnership_s_corp_income", + "partnership_income", + "s_corp_income", "farm_income", "farm_rent_income", "estate_income", @@ -123,8 +124,9 @@ "miscellaneous_income", "non_qualified_dividend_income", "non_sch_d_capital_gains", - "partnership_s_corp_income", - "partnership_se_income", + "partnership_income", + "s_corp_income", + "partnership_self_employment_net_earnings", "qualified_bdc_income", "qualified_dividend_income", "qualified_reit_and_ptp_income", diff --git a/policyengine_us_data/datasets/cps/long_term/run_household_projection.py b/policyengine_us_data/datasets/cps/long_term/run_household_projection.py index 87d47fd94..6fcbc469f 100644 --- a/policyengine_us_data/datasets/cps/long_term/run_household_projection.py +++ b/policyengine_us_data/datasets/cps/long_term/run_household_projection.py @@ -304,8 +304,9 @@ def create_h6_reform(): "non_qualified_dividend_income", "taxable_interest_income", "tax_exempt_interest_income", - "partnership_s_corp_income", - "partnership_se_income", + "partnership_income", + "s_corp_income", + "partnership_self_employment_net_earnings", "estate_income", "rental_income", "farm_income", diff --git a/policyengine_us_data/datasets/puf/puf.py b/policyengine_us_data/datasets/puf/puf.py index 1aa43f2d0..46bf86a23 100644 --- a/policyengine_us_data/datasets/puf/puf.py +++ b/policyengine_us_data/datasets/puf/puf.py @@ -251,11 +251,14 @@ def sample_exposure_scaled_beta(base, params, rng): def simulate_investment_qbi_income_from_puf(puf, *, rng): """Simulate qualified REIT/PTP and BDC income from observed exposures.""" + partnership_s_corp_income = puf_column_values( + puf, "partnership_income" + ) + puf_column_values(puf, "s_corp_income") + if "partnership_income" not in puf and "s_corp_income" not in puf: + partnership_s_corp_income = puf_column_values(puf, "partnership_s_corp_income") exposure_bases = { "non_qualified_dividend_income": non_qualified_dividend_income_from_puf(puf), - "partnership_s_corp_income": puf_column_values( - puf, "partnership_s_corp_income" - ), + "partnership_s_corp_income": partnership_s_corp_income, } qualified_reit_and_ptp_income = np.zeros(len(puf), dtype=float) @@ -673,6 +676,9 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame: s_corp_income = puf.E26190 - puf.E26180 # Schedule E active partnership income partnership_income = puf.E25980 - puf.E25960 + puf["s_corp_income"] = s_corp_income + puf["partnership_income"] = partnership_income + # Keep the combined value as an internal QBI simulation helper only. puf["partnership_s_corp_income"] = s_corp_income + partnership_income # Schedule F active farming operations puf["farm_operations_income"] = puf.E02100 @@ -737,7 +743,7 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame: - puf["E25960"].fillna(0) ) != 0 partnership_se = np.where(has_partnership, gross_se - schedule_c_f_income, 0) - puf["partnership_se_income"] = partnership_se + puf["partnership_self_employment_net_earnings"] = partnership_se # --- Qualified Business Income Deduction (QBID) simulation --- puf = add_qbi_qualification_flags_to_puf(puf, seed=QBI_QUALIFICATION_SEED) @@ -866,8 +872,9 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame: "sstb_w2_wages_from_qualified_business", "sstb_unadjusted_basis_qualified_property", "deductible_mortgage_interest", - "partnership_s_corp_income", - "partnership_se_income", + "partnership_income", + "s_corp_income", + "partnership_self_employment_net_earnings", "qualified_reit_and_ptp_income", "qualified_bdc_income", "attends_eligible_educational_institution_for_lifetime_learning_credit", @@ -1005,6 +1012,8 @@ def _qbi_simulation_overrides( length = None for key in ( *QBI_SOURCE_NAMES, + "partnership_income", + "s_corp_income", "sstb_self_employment_income", "business_is_sstb", "w2_wages_from_qualified_business", @@ -1098,9 +1107,20 @@ def _qbi_simulation_overrides( source_arrays = {} for source in QBI_SOURCE_NAMES: - source_arrays[source] = self._values_from_file_or_overrides( - file_handle, source, existing_overrides, length - ).astype(float) + if ( + source == "partnership_s_corp_income" + and source not in file_handle + and source not in existing_overrides + ): + source_arrays[source] = self._values_from_file_or_overrides( + file_handle, "partnership_income", existing_overrides, length + ).astype(float) + self._values_from_file_or_overrides( + file_handle, "s_corp_income", existing_overrides, length + ).astype(float) + else: + source_arrays[source] = self._values_from_file_or_overrides( + file_handle, source, existing_overrides, length + ).astype(float) source_arrays["self_employment_income"] = ( self._values_from_file_or_overrides( diff --git a/pyproject.toml b/pyproject.toml index f9dff0c7b..b3c9a839d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ classifiers = [ "Programming Language :: Python :: 3.14", ] dependencies = [ - "policyengine-us==1.715.3", + "policyengine-us==1.729.0", # policyengine-core 3.26.1 is the current 3.26.x runtime and includes the fix for # PolicyEngine/policyengine-core#482 (user-set ETERNITY inputs lost # after _invalidate_all_caches) and is required by policyengine-us 1.682.1+. diff --git a/tests/unit/calibration/test_calibration_puf_impute.py b/tests/unit/calibration/test_calibration_puf_impute.py index de6fa4c90..b81bc6c62 100644 --- a/tests/unit/calibration/test_calibration_puf_impute.py +++ b/tests/unit/calibration/test_calibration_puf_impute.py @@ -271,11 +271,15 @@ def fake_run_qrf_imputation(*args, **kwargs): def test_puf_only_variables_are_imputed_onto_cps_half(self, monkeypatch): data = _make_mock_data(n_persons=20, n_households=5) - assert "partnership_s_corp_income" not in data + assert "partnership_income" not in data + assert "s_corp_income" not in data + data["partnership_s_corp_income"] = {2024: np.full(20, 123, dtype=np.float32)} - predictions = np.arange(20, dtype=np.float32) + 100 + partnership_predictions = np.arange(20, dtype=np.float32) + 100 + s_corp_predictions = np.arange(20, dtype=np.float32) + 200 y_full = {var: np.ones(20, dtype=np.float32) for var in IMPUTED_VARIABLES} - y_full["partnership_s_corp_income"] = predictions + y_full["partnership_income"] = partnership_predictions + y_full["s_corp_income"] = s_corp_predictions y_full["employment_income"] = np.full(20, 999_999, dtype=np.float32) def fake_run_qrf_imputation(*args, **kwargs): @@ -295,9 +299,15 @@ def fake_run_qrf_imputation(*args, **kwargs): skip_qrf=False, ) - partnership = result["partnership_s_corp_income"][2024] - np.testing.assert_array_equal(partnership[:20], predictions) - np.testing.assert_array_equal(partnership[20:], predictions) + assert "partnership_s_corp_income" not in result + + partnership = result["partnership_income"][2024] + np.testing.assert_array_equal(partnership[:20], partnership_predictions) + np.testing.assert_array_equal(partnership[20:], partnership_predictions) + + s_corp = result["s_corp_income"][2024] + np.testing.assert_array_equal(s_corp[:20], s_corp_predictions) + np.testing.assert_array_equal(s_corp[20:], s_corp_predictions) employment = result["employment_income"][2024] np.testing.assert_array_equal(employment[:20], data["employment_income"][2024]) diff --git a/tests/unit/datasets/test_puf_qbi.py b/tests/unit/datasets/test_puf_qbi.py index fade4e0d8..a409079cf 100644 --- a/tests/unit/datasets/test_puf_qbi.py +++ b/tests/unit/datasets/test_puf_qbi.py @@ -239,7 +239,8 @@ def mutate(params): { "qualified_dividend_income": [900.0, 0.0, 0.0], "non_qualified_dividend_income": [100.0, 0.0, 0.0], - "partnership_s_corp_income": [0.0, 200.0, 0.0], + "partnership_income": [0.0, 150.0, 0.0], + "s_corp_income": [0.0, 50.0, 0.0], } ) @@ -271,6 +272,16 @@ def mutate(params): params["ubia_simulation"]["capital_intensity_probabilities"][source] = 1.0 for source in params["sstb_prob_map_by_source_name"]: params["sstb_prob_map_by_source_name"][source] = 0.0 + params["reit_ptp_income_distribution"] = { + "partnership_s_corp_income": { + "probability_of_receiving": 1.0, + "beta_a": 1.0, + "beta_b": 1.0, + "scale": 0.0, + "shift": 0.25, + } + } + params["bdc_income_distribution"] = {} _set_qbi_params(monkeypatch, mutate) with h5py.File(DummyPUF.file_path, "w") as file_handle: @@ -278,9 +289,8 @@ def mutate(params): file_handle.create_dataset( "self_employment_income", data=np.array([10_000.0, 0.0]) ) - file_handle.create_dataset( - "partnership_s_corp_income", data=np.array([0.0, 20_000.0]) - ) + file_handle.create_dataset("partnership_income", data=np.array([0.0, 12_000.0])) + file_handle.create_dataset("s_corp_income", data=np.array([0.0, 8_000.0])) for source in set(puf_module.QBI_SOURCE_NAMES) - { "self_employment_income", "partnership_s_corp_income", @@ -297,6 +307,9 @@ def mutate(params): assert "qualified_bdc_income" in arrays np.testing.assert_array_equal(arrays["business_is_sstb"], np.array([False, False])) assert np.all(arrays["unadjusted_basis_qualified_property"] > 0) + np.testing.assert_allclose( + arrays["qualified_reit_and_ptp_income"], np.array([0.0, 5_000.0]) + ) def test_puf_load_dataset_repairs_qbi_with_person_level_length(tmp_path, monkeypatch): diff --git a/tests/unit/test_long_term_calibration_contract.py b/tests/unit/test_long_term_calibration_contract.py index 60d28a471..7d9423daf 100644 --- a/tests/unit/test_long_term_calibration_contract.py +++ b/tests/unit/test_long_term_calibration_contract.py @@ -3071,7 +3071,8 @@ def test_compose_role_donor_rows_falls_back_for_missing_dependents(): enriched = df.copy() enriched["__pe_payroll_uprating_factor"] = 2.0 enriched["__pe_ss_uprating_factor"] = 3.0 - enriched["partnership_s_corp_income__2024"] = 1_000_000.0 + enriched["partnership_income__2024"] = 600_000.0 + enriched["s_corp_income__2024"] = 400_000.0 enriched["taxable_interest_income__2024"] = 500_000.0 older_rows = enriched[enriched["person_tax_unit_id__2024"] == 201].copy() @@ -3133,14 +3134,21 @@ def test_compose_role_donor_rows_can_sanitize_worker_non_target_income(): 0.0, 5_000.0, ] - enriched["partnership_s_corp_income__2024"] = [ + enriched["partnership_income__2024"] = [ 0.0, 0.0, - 6_000.0, + 3_000.0, 0.0, - 8_000.0, + 4_000.0, ] - enriched["partnership_se_income__2024"] = [ + enriched["s_corp_income__2024"] = [ + 0.0, + 0.0, + 3_000.0, + 0.0, + 4_000.0, + ] + enriched["partnership_self_employment_net_earnings__2024"] = [ 0.0, 0.0, -9_000.0, @@ -3192,7 +3200,8 @@ def test_compose_role_donor_rows_can_sanitize_worker_non_target_income(): pytest.approx(0.0) ) assert worker_clone["taxable_private_pension_income__2024"] == pytest.approx(0.0) - assert worker_clone["partnership_s_corp_income__2024"] == pytest.approx(0.0) + assert worker_clone["partnership_income__2024"] == pytest.approx(0.0) + assert worker_clone["s_corp_income__2024"] == pytest.approx(0.0) assert worker_clone["employment_income_before_lsr__2024"] == pytest.approx(50_000.0) assert ( "long_term_capital_gains_before_response__2024" @@ -3223,14 +3232,21 @@ def test_compose_role_donor_rows_can_sanitize_all_clone_non_target_income(): 0.0, 5_000.0, ] - enriched["partnership_s_corp_income__2024"] = [ + enriched["partnership_income__2024"] = [ 0.0, 0.0, - 6_000.0, + 3_000.0, 0.0, - 8_000.0, + 4_000.0, + ] + enriched["s_corp_income__2024"] = [ + 0.0, + 0.0, + 3_000.0, + 0.0, + 4_000.0, ] - enriched["partnership_se_income__2024"] = [ + enriched["partnership_self_employment_net_earnings__2024"] = [ 0.0, 0.0, -9_000.0, @@ -3283,8 +3299,11 @@ def test_compose_role_donor_rows_can_sanitize_all_clone_non_target_income(): pytest.approx(0.0) ) assert clone["taxable_private_pension_income__2024"] == pytest.approx(0.0) - assert clone["partnership_s_corp_income__2024"] == pytest.approx(0.0) - assert clone["partnership_se_income__2024"] == pytest.approx(0.0) + assert clone["partnership_income__2024"] == pytest.approx(0.0) + assert clone["s_corp_income__2024"] == pytest.approx(0.0) + assert clone["partnership_self_employment_net_earnings__2024"] == pytest.approx( + 0.0 + ) assert older_clone["social_security_retirement__2024"] == pytest.approx(20_000.0) assert worker_clone["employment_income_before_lsr__2024"] == pytest.approx(50_000.0) assert ( @@ -3292,7 +3311,7 @@ def test_compose_role_donor_rows_can_sanitize_all_clone_non_target_income(): in clone_df.attrs["sanitized_clone_non_target_income_columns"] ) assert ( - "partnership_se_income__2024" + "partnership_self_employment_net_earnings__2024" in clone_df.attrs["sanitized_clone_non_target_income_columns"] ) assert "sanitized_worker_non_target_income_columns" not in clone_df.attrs diff --git a/uv.lock b/uv.lock index 66f1ed9d7..15fbf2df7 100644 --- a/uv.lock +++ b/uv.lock @@ -2164,7 +2164,7 @@ wheels = [ [[package]] name = "policyengine-us" -version = "1.715.3" +version = "1.729.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "microdf-python" }, @@ -2174,9 +2174,9 @@ dependencies = [ { name = "tables" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/60/bc/ea8cf84d7653d4d76d1f7b05feb74722ff903637c616357610de1fd3b431/policyengine_us-1.715.3.tar.gz", hash = "sha256:5b41b22be90ef155a9440bcae7dd26115c887cad92ae8a51d9080a9692053b66", size = 10014788, upload-time = "2026-05-29T21:33:02.993Z" } +sdist = { url = "https://files.pythonhosted.org/packages/69/cb/b2efba2094a708cd71890d98d72b99394fabc5894a4cceec14381e03fa35/policyengine_us-1.729.0.tar.gz", hash = "sha256:ac05c4d621c7f848b0806effc14e913160d5d47d777eadced6bc18edf392d75c", size = 10373862, upload-time = "2026-06-14T18:05:25.747Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f4/0f/e6b594d46fffeb6e40db3a51441cec6a6e76ade2b178eab3836528dbc15c/policyengine_us-1.715.3-py3-none-any.whl", hash = "sha256:a34f305871f702d94f7a4d220bfd5312f11d83a417e793566892541871dfded3", size = 11037631, upload-time = "2026-05-29T21:32:59.464Z" }, + { url = "https://files.pythonhosted.org/packages/b9/7d/778f92ae94997b00c3c9ac34b345f6c9333435f905670ee4eeb2f5e19809/policyengine_us-1.729.0-py3-none-any.whl", hash = "sha256:8d21d3f7c0e82a9415edffe8ea53939330a63d9c8f6bd334299bddb697cf2c00", size = 11905076, upload-time = "2026-06-14T18:05:21.806Z" }, ] [[package]] @@ -2246,7 +2246,7 @@ requires-dist = [ { name = "pandas", specifier = ">=2.3.1" }, { name = "pip-system-certs", specifier = ">=3.0" }, { name = "policyengine-core", specifier = ">=3.26.1,<3.27" }, - { name = "policyengine-us", specifier = "==1.715.3" }, + { name = "policyengine-us", specifier = "==1.729.0" }, { name = "requests", specifier = ">=2.25.0" }, { name = "scipy", specifier = ">=1.15.3" }, { name = "setuptools", specifier = ">=60" },