diff --git a/changelog.d/433.md b/changelog.d/433.md new file mode 100644 index 00000000..cfb58ddc --- /dev/null +++ b/changelog.d/433.md @@ -0,0 +1 @@ +- Populate `employment_sector` (public/private, from FRS `mjobsect`) and `sic_industry_division` (SIC 2007, from FRS `sic`) Person-level variables in the FRS dataset. diff --git a/policyengine_uk_data/datasets/frs.py b/policyengine_uk_data/datasets/frs.py index 58e2ac6b..d529896b 100644 --- a/policyengine_uk_data/datasets/frs.py +++ b/policyengine_uk_data/datasets/frs.py @@ -749,6 +749,25 @@ def determine_education_level(fted_val, typeed2_val, age_val): person.empstati, 1, range(12), EMPLOYMENTS ).fillna("LONG_TERM_DISABLED") + # Add employer sector of the main job from FRS `mjobsect` + # (1 = private, 2 = public; missing/blank = not in paid work). + EMPLOYMENT_SECTORS = ["NOT_EMPLOYED", "PRIVATE", "PUBLIC"] + pe_person["employment_sector"] = categorical( + pd.to_numeric(person.mjobsect, errors="coerce"), + 0, + [0, 1, 2], + EMPLOYMENT_SECTORS, + ).fillna("NOT_EMPLOYED") + + # Standard Industrial Classification (2007) division of the main job from + # FRS `sic` (0 if unknown; 84 = public administration and defence). + pe_person["sic_industry_division"] = ( + pd.to_numeric(person.sic, errors="coerce") + .fillna(0) + .clip(lower=0) + .astype(int) + ) + REGIONS = [ "NORTH_EAST", "NORTH_WEST",