From 4c9c5b545e72c36a762aea2494a07d1a4ccebb8e Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Wed, 17 Jun 2026 18:12:11 +0100 Subject: [PATCH 1/2] Carry FRS employer sector and SIC industry into the dataset Populate the new employment_sector (public/private, from FRS mjobsect) and sic_industry_division (SIC 2007, from FRS sic) Person-level variables, using the same categorical() passthrough pattern as employment_status and region. Requires the matching variables in policyengine-uk (PolicyEngine/policyengine-uk#1785). Closes #432 Co-Authored-By: Claude Opus 4.8 (1M context) --- policyengine_uk_data/datasets/frs.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/policyengine_uk_data/datasets/frs.py b/policyengine_uk_data/datasets/frs.py index 58e2ac6b..d529896b 100644 --- a/policyengine_uk_data/datasets/frs.py +++ b/policyengine_uk_data/datasets/frs.py @@ -749,6 +749,25 @@ def determine_education_level(fted_val, typeed2_val, age_val): person.empstati, 1, range(12), EMPLOYMENTS ).fillna("LONG_TERM_DISABLED") + # Add employer sector of the main job from FRS `mjobsect` + # (1 = private, 2 = public; missing/blank = not in paid work). + EMPLOYMENT_SECTORS = ["NOT_EMPLOYED", "PRIVATE", "PUBLIC"] + pe_person["employment_sector"] = categorical( + pd.to_numeric(person.mjobsect, errors="coerce"), + 0, + [0, 1, 2], + EMPLOYMENT_SECTORS, + ).fillna("NOT_EMPLOYED") + + # Standard Industrial Classification (2007) division of the main job from + # FRS `sic` (0 if unknown; 84 = public administration and defence). + pe_person["sic_industry_division"] = ( + pd.to_numeric(person.sic, errors="coerce") + .fillna(0) + .clip(lower=0) + .astype(int) + ) + REGIONS = [ "NORTH_EAST", "NORTH_WEST", From 85229c91d8bcb9d0de5aff9c1b211e3129bfa9e7 Mon Sep 17 00:00:00 2001 From: Vahid Ahmadi Date: Wed, 17 Jun 2026 18:14:00 +0100 Subject: [PATCH 2/2] Add changelog fragment for FRS employer sector passthrough (#433) Co-Authored-By: Claude Opus 4.8 (1M context) --- changelog.d/433.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/433.md diff --git a/changelog.d/433.md b/changelog.d/433.md new file mode 100644 index 00000000..cfb58ddc --- /dev/null +++ b/changelog.d/433.md @@ -0,0 +1 @@ +- Populate `employment_sector` (public/private, from FRS `mjobsect`) and `sic_industry_division` (SIC 2007, from FRS `sic`) Person-level variables in the FRS dataset.