clear version 14.2 set seed 123456789 set more off *cd "code_output/" /****************************************************************************** PURPOSE: CLEAN DATA TO PREPARE THEM FOR FINAL ANALYSES 1: MERGE THE TRAINING AND PREDICTION DATA 2: RECODE MISSING VALUES 3: ASSIGN PRIMARY CAREGIVER TO MOTHER OR FATHER 4: PRIMARY CAREGIVER DEMOGRAPHICS 5: PRIMARY CAREGIVER HEALTH AND BEHAVIOR 6: PRIMARY CAREGIVER WORK INDICATORS 7: SINGLE IMPUTATION Input and output files: selected_vars.dta --> cleaned.dta Machine: Mac laptop Runtime: less than a minute *******************************************************************************/ /* NOTES ON VARIABLE NAMES: *Variables ending in "imm" have replaced missing primary caregiver data due to unknown relationship status (i.e. biological mother) with mother info replaced *missing1 indicates true missing *missing2 indicates missing after using mother's info for those missing PCG status */ *corrects the imputation procedure /****************************************************************************** 1: MERGE THE TRAINING AND PREDICTION DATA *******************************************************************************/ use "data/selected_vars.dta" rename challengeID challengeid merge 1:1 challengeid using "data/train.dta", nogen /****************************************************************************** 2: CLEANING UP MISSING AND CODING TRAINING DATA (see Freese on FFam github) *******************************************************************************/ //Recode missing data foreach var of varlist * { cap recode `var' (-1=.r) (-2=.d) (-3=.z) (-4=.z) (-5=.z) (-6=.i) /// (-7=.z) (-8=.z) (-9=.n) (-10/-14=.z) * string missing values if _rc != 0 { cap replace `var' = ".z" if `var' == "NA" cap destring `var', replace } } // missing codes: // . : in the discovery sample // .r : refuse (-1) // .d = don't know (-2) // .z : in training sample but missing outcome (also -3) // .n : not in wave (-9) // .i = N/A (-6) label define missingL .n "Not in Wave" .z "Missing" .i "N/A" /// .r "Refuse" .d "Don't Know" //Creating a prediction variable with missings (.z) as 0 foreach var in gpa grit materialhardship eviction layoff jobtraining { cap drop pred_`var' gen double pred_`var' = `var' replace pred_`var' = 0 if `var' == .z } //Create variable for whether observation is in training set gen training = pred_layoff != . label variable training "part of training dataset?" order challengeid pred_* gpa grit materialhardship eviction layoff /// jobtraining training, first /****************************************************************************** 3: PRIMARY CAREGIVER STATUS *******************************************************************************/ //pc5statgen- primary caregiver wave 5 status, general recode pcg5idstat 61=1 62=2 63=3, gen (pc5statgen) label define pc5statgenL .n "Not in Wave" 1 "Biological Mother" /// 2 "Biological Father" 3 "Other" label values pc5statgen pc5statgenL label var pc5statgen "Primary-caregiver status general" *Check tab pc5statgen pcg5idstat, m col //pc5npstat- non-parental primary caregiver status //coding them as maternal, paternal, or other recode n5a1 (1 3 5 7 = 1 "Maternal Relative") /// (2 4 6 8 = 2 "Paternal Relative") (9/14 = 3 "Other") /// (-9=.n "Not in Wave") (-7=.i "N/A") /// (-3=.z "Missing") /// , gen(pc5npstat) label var pc5npstat "Non-parental primary caregiver status;sv: n5a1" *Check tab n5a1 pc5npstat, m //pc5statdet - primary caregiver status, more specific //combines pc5statgen and pc5npstat gen pc5statdet = 1 if pc5statgen==1 replace pc5statdet = 2 if pc5statgen==2 replace pc5statdet = 5 if pc5statgen==3 replace pc5statdet = 3 if pc5npstat==1 replace pc5statdet = 4 if pc5npstat==2 replace pc5statdet = .n if pc5statgen==.n //not in wave according to pcg5idstat replace pc5statdet = .i if pc5npstat==.i //missing according to pc5npstat label var pc5statdet "Primary caregiver status specific" label define pc5statdetL 1 "Biological Mother" 2 "Biological Father" /// 3 "Maternal Relative" 4 "Paternal Relative" /// 5 "Other Caregiver" .n "Not in Wave" /// .i "Missing" label values pc5statdet pc5statdetL *Check tab n5a1 pc5statdet, m tab pcg5idstat pc5statdet, m //primary caregiver relationship status missing gen pcstatm=pc5statgen>=. label var pcstatm "Missing primary caregiver relationship status" *Check tab pcstatm pcstat, m col order pc5statdet pc5statgen pcstatm pc5npstat, after(training) /****************************************************************************** 4: PRIMARY CAREGIVER DEMORGAPHICS *******************************************************************************/ /********************* SEX *********************/ gen pc5female=n5d1==2 if n5d1>=1 //non-parental primary caregiver is female replace pc5female=1 if pc5statgen==1 //primary caregiver is biological mother replace pc5female=0 if pc5statgen==2 //primary caregiver biological father replace pc5female=pc5statgen if pc5statgen>=. label var pc5female "Primary caregiver is female" label values pc5female missingL *Check tab pc5female n5d1, m tab pc5female pc5statdet, m //Making variable that replaces those with missing pcg status with bio. mom's gen pc5femaleimm = pc5female replace pc5femaleimm=1 if pc5statgen==.n label var pc5femaleimm "Primary caregiver is female" label val pc5femaleimm missingL tab pc5femaleimm pc5female, m //making a missing sex dummy gen pc5femalem1=pc5female>=. label var pc5femalem1 "Missing primary caregiver sex" gen pc5femalem2=pc5femaleimm>=. label var pc5femalem2 "Missing primary caregiver sex" *Check tab pc5femalem1 pc5female, m col tab pc5femalem2 pc5femaleimm, m col order pc5femalem1 pc5femalem2 pc5femaleimm pc5female, after(pc5npstat) /********************* RACE *********************/ gen pc5race = cm1ethrace if pc5statdet==1 //mother race if bio. mother replace pc5race = cf1ethrace if pc5statdet==2 //father race if bio. father replace pc5race = cm1ethrace if pc5statdet==3 //mother race if mat. rel. replace pc5race = cf1ethrace if pc5statdet==4 //father race if pat. rel. replace pc5race = .n if pc5statgen==.n label var pc5race "Primary caregiver race/ethnicity" label define ethraceL 1 "White Non-Hispanic" 2 "Black Non-Hispanic" /// 3 "Hispanic" 4 "Other" .n "Not in Wave" /// .z "Missing" .i "N/A" label values pc5race ethraceL *Check tab pc5race cm1ethrace if pc5statgen==1, m tab pc5race cf1ethrace if pc5statgen==2, m tab pc5race cm1ethrace if pc5statdet==3, m tab pc5race cf1ethrace if pc5statdet==4, m tab pc5race if pc5statdet==5, m tab pc5race, m //Making variable that replaces those with missing pcg status with bio. mom's gen pc5raceimm = pc5race replace pc5raceimm=cm1ethrace if pc5statgen==.n label var pc5raceimm "Primary caregiver race/ethnicity" label values pc5raceimm ethraceL //Making race dummy variables tab pc5race, gen(pc5race) tab pc5raceimm, gen(pc5raceimm) *white rename pc5race1 pc5white label var pc5white "Primary caregiver is white" replace pc5white = pc5statgen if pc5statgen>. rename pc5raceimm1 pc5whiteimm label var pc5whiteimm "Primary caregiver is white" *black rename pc5race2 pc5black label var pc5black "Primary caregiver is black" replace pc5black = pc5statgen if pc5statgen>. rename pc5raceimm2 pc5blackimm label var pc5blackimm "Primary caregiver is black" *hispanic rename pc5race3 pc5hisp label var pc5hisp "Primary caregiver is Hispanic" replace pc5hisp = pc5statgen if pc5statgen>. rename pc5raceimm3 pc5hispimm label var pc5hispimm "Primary caregiver is Hispanic" *other rename pc5race4 pc5other label var pc5other "Primary caregiver is other race/ethnicity" replace pc5other= pc5statgen if pc5statgen>. rename pc5raceimm4 pc5otherimm label var pc5otherimm "Primary caregiver is other race/ethnicity" //Race missing variables gen pc5racem1=pc5race>=. label var pc5racem1 "Missing primary caregiver race" gen pc5racem2=pc5raceimm>=. label var pc5racem2 "Missing primary caregiver race" *Check tab pc5racem1 pc5race, m col tab pc5racem2 pc5raceimm, m col order pc5white* pc5black* pc5hisp* pc5other* pc5racem* pc5raceimm pc5race, /// after(pc5female) /********************* AGE AT CHILD'S BIRTH *********************/ *mother is primary caregiver gen pc5age = cm1age if pc5statgen==1 //mothers age at birth *father is primary caregiver replace pc5age = cf1age if pc5statgen==2 & cf1age>0 //father's age at birth + 9 *non-parental primary caregiver replace pc5age = n5d2_age if pc5statgen==3 //non-parental if non-miss replace pc5age = (pc5age-9) if pc5statgen==3 & pc5age<. //subtracting 9 for age at birth *missing pcg replace pc5age = pc5statgen if pc5statgen>. label var pc5age "Primary caregiver age at child's birth" label values pc5age missingL *Check sum pc5age bysort pc5statgen: sum pc5age //Mean-centered age summ pc5age, meanonly gen pc5agec = pc5age-r(mean) label var pc5agec "Primary caregiver mean-centered age at child's birth" label values pc5age missingL //Making variable that replaces those with missing pcg status with bio. mom's *mother is primary caregiver gen pc5ageimm = pc5age *pcg is not in wave replace pc5ageimm = cm1age if pc5statgen==.n label var pc5ageimm "Primary caregiver age at child's birth" label values pc5ageimm missingL //Mean-centered age, missing pcg status replaced with biological mother's summ pc5ageimm, meanonly gen pc5agecimm = pc5ageimm-r(mean) label var pc5agecimm "Primary caregiver mean-centered age at child's birth" label values pc5agecimm missingL *Check sum pc5agecimm pc5ageimm bysort pc5statgen: sum pc5agecimm pc5ageimm //Age missing variables gen pc5agem1=pc5age>=. label var pc5agem1 "Missing primary caregiver age" gen pc5agem2=pc5ageimm>=. label var pc5agem2 "Missing primary caregiver age" *Check tab pc5agem1 tab pc5agem2 order pc5agem1 pc5agem2 pc5agecimm pc5agec pc5ageimm pc5age, after(pc5race) /******************************* MARRIED OR COHABITATING ********************************/ //mother married or cohabitating gen m5marcoh = 0 if cm5marf<. *Check tab m5marcoh, m replace m5marcoh = 1 if (cm5marf==1 | cm5marp==1 | cm5cohf==1| /// cm5cohp==1) *Check tab m5marcoh, m *imputing back to wave 2 because that leads to no missing values foreach i in 4 3 2 { replace m5marcoh = 0 if (cm`i'marf==0 | cm`i'marp==0 | cm`i'cohf==0 | /// cm`i'cohp==0) & m5marcoh==. replace m5marcoh = 1 if (cm`i'marf==1 | cm`i'marp==1 | cm`i'cohf==1| /// cm`i'cohp==1) & m5marcoh==. } *Check tab m5marcoh, m //father married or cohabitating gen f5marcoh = 0 if cf5marm<. *Check tab f5marcoh, m replace f5marcoh = 1 if (cf5marm==1 | cf5marp==1 | cf5cohm==1| /// cf5cohp==1) *Check tab f5marcoh, m *imputing back to wave 2 foreach i in 4 3 2 { replace f5marcoh = 0 if (cf`i'marm==0 | cf`i'marp==0 | cf`i'cohm==0 | /// cf`i'cohp==0) & f5marcoh==. replace f5marcoh = 1 if (cf`i'marm==1 | cf`i'marp==1 | cf`i'cohm==1| /// cf`i'cohp==1) & f5marcoh==. } *Check tab f5marcoh, m //non-parental pcg married or cohabitatin gen np5marcoh = 0 if n5d3h<. replace np5marcoh = 1 if (n5d3d_1==1|n5d3d_1==2|n5d3d_2==1|n5d3d_2==2| /// n5d3d_3==1|n5d3d_3==2|n5d3d_4==1|n5d3d_4==2| /// n5d3d_5==1|n5d3d_5==2|n5d3d_6==1|n5d3d_6==2| /// n5d3d_7==1|n5d3d_7==2|n5d3d_8==1|n5d3d_8==2) /// *Check tab np5marcoh, m //Making primary caregiver cohabitation variable *mother is primary caregiver gen pc5marcoh = m5marcoh if pc5statgen==1 *father is primary caregiver replace pc5marcoh = f5marcoh if pc5statgen==2 *non-parental primary caregiver replace pc5marcoh = np5marcoh if pc5statgen==3 *missing primary caregiver replace pc5marcoh = pc5statgen if pc5statgen>. //coding the missings- just using the married to father/mother from survey 5 replace pc5marcoh = cm5marf >=. if pc5marcoh==. & (pc5statgen==1) replace pc5marcoh = cf5marm >=. if pc5marcoh==. & (pc5statgen==2) replace pc5marcoh = n5d3h >=. if pc5marcoh==. & (pc5statgen==3) label var pc5marcoh "Primary caregiver is married or cohabiting" label values pc5marcoh missingL *Check tab cm5marf pc5marcoh if pc5statdet==1, m tab cm5cohf pc5marcoh if pc5statdet==1, m tab cm5marp pc5marcoh if pc5statdet==1, m tab cm5cohp pc5marcoh if pc5statdet==1, m tab cf5marm pc5marcoh if pc5statdet==2, m tab cf5cohm pc5marcoh if pc5statdet==2, m tab cf5marp pc5marcoh if pc5statdet==2, m tab cf5cohp pc5marcoh if pc5statdet==2, m bysort pc5statgen: tab pc5marcoh, m tab pc5marcoh, m //Making variable that replaces those with missing pcg status with bio. mom's gen pc5marcohimm = pc5marcoh replace pc5marcohimm = m5marcoh if pc5statgen==.n //coding the missings- just using the married to father/mother from survey 5 replace pc5marcohimm = cm5marf >=. if pc5marcohimm==. & (pc5statgen==1|pc5statgen==.n) replace pc5marcohimm = cf5marm >=. if pc5marcohimm==. & (pc5statgen==2) replace pc5marcohimm = n5d3h >=. if pc5marcohimm==. & (pc5statgen==3) label var pc5marcohimm "Primary caregiver is married or cohabiting" label values pc5marcohimm missingL *Check bysort pc5statgen: tab pc5marcohimm, m //making missing married/cohabiting dummies gen pc5marcohm1=pc5marcoh>=. label var pc5marcohm1 "Missing primary caregiver marital/cohabiting status" gen pc5marcohm2=pc5marcohimm>=. label var pc5marcohm2 "Missing primary caregiver marital/cohabiting status" *Check tab pc5marcohm1 pc5marcoh, m col tab pc5marcohm2 pc5marcohimm, m col order pc5marcohm* pc5marcohimm pc5marcoh, after(pc5age) /******************************* HIGHEST EDUCATION ********************************/ //Mother highest education gen m5highedu =cm1edu *Check tab m5highedu *additional education: HS foreach var in m2k3a1 m3k3a_1 m4k3a_1 m5i3a_1{ replace m5highedu=2 if `var'==1 & m5highedu<2 } *Check tab m5highedu *additional education: some college or 2 year college foreach var in m2k3a15 m3k3a_15 m4k3a_15 m5i3a_15 m2k3a9 m3k3a_9 m4k3a_9 /// m5i3a_9{ replace m5highedu=3 if `var'==1 & m5highedu<3 } *Check tab m5highedu *additional education: 4 year college or grad foreach var in m2k3a15 m3k3a_10 m4k3a_10 m5i3a_10 m2k3a16 m3k3a_16 m4k3a_16 /// m5i3a_16{ replace m5highedu=4 if `var'==1 & m5highedu<4 } *Check tab m5highedu //Father highest education gen f5highedu=cf1edu *Check tab f5highedu *year 2- capturing missings from year 1 gen f2k1aV2=f2k1a recode f2k1aV2 2/3=1 4/5=2 6/7 8/9=4 replace f5highedu=f2k1aV2 if f5highedu>=. *Check tab f5highedu *additional education: HS foreach var in f2k5a1 f3k3a_1 f4k3a_1 f5i3a_1{ replace f5highedu=2 if `var'==1 & f5highedu<2 } *Check tab f5highedu *additional education: some college or 2 year college foreach var in f2k5a15 f3k3a_15 f4k3a_15 f5i3a_15 f2k5a9 f3k3a_9 f4k3a_9 /// f5i3a_9{ replace f5highedu=3 if `var'==1 & f5highedu<3 } *Check tab f5highedu *additional education: 4 year college or grad foreach var in f2k5a15 f3k3a_10 f4k3a_10 f5i3a_10 f2k5a16 f3k3a_16 f4k3a_16 /// f5i3a_16{ replace f5highedu=4 if `var'==1 & f5highedu<4 } *Check tab f5highedu //non-parental primary caregiver gen np5highed=n5e1 //making a copy var of the np pcg to make recoding easier recode np5highed 2/3=1 4/5=2 6/11=3 12/13=4 *Check tab n5e1 np5highed //Making pcg highed variable gen pc5highed=m5highedu if pc5statgen==1 replace pc5highed=f5highedu if pc5statgen==2 replace pc5highed=np5highed if pc5statgen==3 replace pc5highed=pc5statgen if pc5statgen>. label define pc5highedL 1 "Less than HS" 2 "HS or HS equivalency" /// 3 "Some college or 2-year degree" /// 4 "4-year degree or higher" /// .n "Not in Wave" .z "Missing" .i "N/A" /// .r "Refuse" .d "Don't Know" label values pc5highed pc5highedL label var pc5highed "Primary caregiver highest education" *Check bysort pc5statgen: tab pc5highed, m tab pc5highed cm1edu if pc5statgen==1, m //Making variable that replaces those with missing pcg status with bio. mom's gen pc5highedimm=pc5highed replace pc5highedimm=m5highedu if pc5statgen==.n label values pc5highedimm pc5highedL label var pc5highedimm "Primary caregiver highest education" *Check bysort pc5statgen: tab pc5highedimm, m //Dummy Variables tab pc5highed, gen(pc5highed) tab pc5highedimm, gen(pc5highedimm) rename pc5highed1 pc5lhs rename pc5highedimm1 pc5lhsimm rename pc5highed2 pc5hs rename pc5highedimm2 pc5hsimm rename pc5highed3 pc52yr rename pc5highedimm3 pc52yrimm rename pc5highed4 pc54yr rename pc5highedimm4 pc54yrimm label var pc5lhs "Primary care giver highest ed:less than HS" label var pc5hs "Primary care giver highest ed:HS grad" label var pc52yr "Primary care giver highest ed:two-year degee" label var pc54yr "Primary care giver highest ed:four-year degree or more" label var pc5lhsimm "Primary care giver highest ed:less than HS" label var pc5hsimm "Primary care giver highest ed:HS grad" label var pc52yrimm "Primary care giver highest ed:two-year degee" label var pc54yrimm "Primary care giver highest ed:four-year degree or more" //At least some college dummy gen pc5scplus=pc5highed recode pc5scplus 1/2=0 3/4=1 label define pc5scplusL 0 "HS or less" 1 "At least some college" label values pc5scplus pc5scplusL label var pc5scplus "Primary caregiver has at least some college" *Check tab pc5scplus pc5highed gen pc5scplusimm=pc5highedimm recode pc5scplusimm 1/2=0 3/4=1 label values pc5scplusimm pc5scplusL label var pc5scplusimm "Primary caregiver has at least some college" *Check tab pc5scplusimm pc5highedimm //Making missing highest education dummies gen pc5highedm1=pc5highed>=. label var pc5highedm1 "Missing primary caregiver highest education" gen pc5highedm2=pc5highedimm>=. label var pc5highedm2 "Missing primary caregiver highest education" *Check tab pc5highedm1 pc5highed, m col tab pc5highedm2 pc5highedimm, m col order pc5highedm* pc5lhs* pc5hs* pc52yr* pc54yr* pc5scplus* pc5highedimm /// pc5highed , after(pc5marcoh) /************************************** GRANDPARENTS' HIGHEST EDUCATION ***************************************/ //Mother's parents' ed *recoding the variables- only go back to the 2nd survey adminstration foreach var in m2g2 m3h1e m4h1e m2g3 m3h1f m4h1f { gen `var'a = `var' recode `var'a 2/3=1 4 101=2 5/8 12=3 9/11=4 } foreach var in m5e1e m5e1f { gen `var'a = `var' recode `var'a 2/5=1 9/11=3 12/13=4 } *replacing if graduated high school replace m2g2a = 2 if m2g2c1 == 12 replace m3h1ea = 2 if m3h1e1 == 12 replace m4h1ea = 2 if m4h1e1 == 12 replace m5e1ea = 2 if m5e1e1 == 12 replace m2g3a = 2 if m2g3c1 == 12 replace m3h1fa = 2 if m3h1f1 == 12 replace m4h1fa = 2 if m4h1f1 == 12 replace m5e1fa = 2 if m5e1f1 == 12 //now assigning the highest grandparent education egen m5parhighed = rowmax(m2g2a m3h1ea m4h1ea m5e1ea m2g3a m3h1fa /// m4h1fa m5e1fa) sum m2g2a m3h1ea m4h1ea m5e1ea m2g3a m3h1fa /// m4h1fa m5e1fa *Check tab m5parhighed, m //Father's parents' ed *recoding the variables- only go back to the 2nd survey adminstration foreach var in f2g2 f3h1e f4h1e f2g3 f3h1f f4h1f { gen `var'a = `var' recode `var'a 2/3=1 4 101=2 5/8 12=3 9/11=4 } foreach var in f5e1e f5e1f { gen `var'a = `var' recode `var'a 2/5=1 9/11=3 12/13=4 } *replacing if graduated high school replace f2g2a = 2 if f2g2c == 12 replace f3h1ea = 2 if f3h1e1 == 12 replace f4h1ea = 2 if f4h1e1 == 12 replace f5e1ea = 2 if f5e1e1 == 12 replace m2g3a = 2 if m2g3c1 == 12 replace m3h1fa = 2 if m3h1f1 == 12 replace m4h1fa = 2 if m4h1f1 == 12 replace m5e1fa = 2 if m5e1f1 == 12 //now assigning the highest grandparent education egen f5parhighed = rowmax(f2g2a f3h1ea f4h1ea f5e1ea f2g3a f3h1fa /// f4h1fa f5e1fa) sum f2g2a f3h1ea f4h1ea f5e1ea f2g3a f3h1fa /// f4h1fa f5e1fa *Check tab f5parhighed, m //Now making PCG variable gen pc5parhighed = m5parhighed if pc5statgen==1 replace pc5parhighed = f5parhighed if pc5statgen==2 replace pc5parhighed = .i if pc5statgen==3 replace pc5parhighed = pc5statgen if pc5statgen>. label var pc5parhighed "Primary caregiver's parents' highest ed" label values pc5parhighed pc5highedL *Check bysort pc5statgen: tab pc5parhighed, m //Making variable that replaces those with missing pcg status with bio. mom's gen pc5parhighedimm = pc5parhighed replace pc5parhighedimm = m5parhighed if pc5statgen==.n label values pc5parhighedimm pc5highedL label var pc5parhighedimm "Primary caregiver's parents' highest ed" *Check bysort pc5statgen: tab pc5parhighedimm, m //Making missing parents' highest education dummies gen pc5parhighedm1=pc5parhighed>=. label var pc5parhighedm1 "Missing primary caregiver's parents' highest education" gen pc5parhighedm2=pc5parhighedimm>=. label var pc5parhighedm2 "Missing primary caregiver's parents' highest education" *Check tab pc5parhighedm1 pc5parhighed, m col tab pc5parhighedm2 pc5parhighedimm, m col order pc5parhighedm* pc5parhighedimm pc5parhighed, after(pc5highed) /************************************** CITIZENSHIP ***************************************/ //Mother citizenship status gen m5citizen=. foreach var in m1h2 m2g1d m3h1d m4h1d m5e1d{ replace m5citizen=`var' if m5citizen>=. *Check tab m5citizen, m } //Father citizenship status gen f5citizen=. foreach var in f1h2 f2g1d f3h1d f4h1d f5e1d{ replace f5citizen=`var' if f5citizen>=. *Check tab f5citizen, m } //these loops replace the pcg variable with the born in the US (m/f1) and the //citizenship questions if the citizenship variable is still missing. This leaves //us with the earliest missing and most recent non-missing answer //NP PCG citizenship status gen np5citizen=n5d5c replace np5citizen=1 if n5d5a==1 // this makes the citizenship=1 if the np pcg was born in the US *Check tab np5citizen, m //Making a PCG citizenship variable gen pc5citizen=m5citizen if pc5statgen==1 replace pc5citizen=f5citizen if pc5statgen==2 replace pc5citizen=np5citizen if pc5statgen==3 replace pc5citizen=pc5statgen if pc5statgen>. label var pc5citizen "Primary caregiver US citizenship" label values pc5citizen missingL //Making variable that replaces those with missing pcg status with bio. mom's gen pc5citizenimm=pc5citizen replace pc5citizenimm=m5citizen if pc5statgen==.n label var pc5citizenimm "Primary caregiver US citizenship" label values pc5citizenimm missingL //recoding the variables so yes is 1 and no is 0, and recoding missing recode pc5citizen 2=0 recode pc5citizenimm 2=0 *Check bysort pc5statgen: tab pc5citizen, m bysort pc5statgen: tab pc5citizenimm, m //making missing citizenship dummies gen pc5citizenm1=pc5citizen>=. label var pc5citizenm1 "Missing primary caregiver's citizenship" gen pc5citizenm2=pc5citizenimm>=. label var pc5citizenm2 "Missing primary caregiver's citizenship" *Check tab pc5citizenm1 pc5citizen, m col tab pc5citizenm2 pc5citizenimm, m col order pc5citizenm* pc5citizenimm pc5citizen, after(pc5parhighed) /************************************** INTACT FAMILY AGE 15 ***************************************/ //mother is primary caregiver or pcg is not in wave 5 gen pc5intact15=m1e2 if pc5statgen==1 //father is primary caregiver replace pc5intact15=f1e2 if pc5statgen==2 replace pc5intact15=f4h0f if pc5statgen==2 & pc5intact15>=. //missing if non-parental caregiver ("other") or missing replace pc5intact15=.i if pc5statgen==3 replace pc5intact15=pc5statgen if pc5statgen>=. label values pc5intact15 missingL label var pc5intact15 "Primary caregiver lived with both parents age 15" //Making variable that replaces those with missing pcg status with bio. mom's gen pc5intact15imm = pc5intact15 replace pc5intact15imm=m1e2 if pc5statgen==.n label values pc5intact15imm missingL label var pc5intact15imm "Primary caregiver lived with both parents age 15" //recode so that 1 is yes and 0 is no recode pc5intact15 2=0 recode pc5intact15imm 2=0 *Check bysort pc5statgen: tab pc5intact15, m bysort pc5statgen: tab pc5intact15imm, m //making a missing intact family at age 15 dummies gen pc5intact15m1=pc5intact15>=. label var pc5intact15m1 "Missing primary caregiver's intact family status" gen pc5intact15m2=pc5intact15imm>=. label var pc5intact15m2 "Missing primary caregiver's intact family status" *Check tab pc5intact15m1 pc5intact15, m col tab pc5intact15m2 pc5intact15imm, m col order pc5intact15m* pc5intact15imm pc5intact15, after(pc5citizen) /************************************** HOUSEHOLD SIZE & # OF KIDS IN HH ***************************************/ *NOTE IF RESULS ARE DIFFERENT CHANGE THIS BACK //HOUSEHOLD SIZE\\ *mother most recent data for household size gen m5hhsizea = m5hhsize replace m5hhsizea = cm4adult + cm4kids if m5hhsize>=. & cm4adult<. & cm4kids<. replace m5hhsizea = cm3adult + cm3kids if m5hhsize>=. & cm3adult<. & cm3kids<. *Check sum m5hhsizea *father most recent data for household size gen f5hhsizea = cf5hhsize replace f5hhsizea = cf4adult + cf4kids if f5hhsize>=. & cf4adult<. & cf4kids<. replace f5hhsizea = cf3adult + cf3kids if f5hhsize>=. & cf3adult<. & cf3kids<. *Check sum f5hhsizea *non-parental primary caregiver wave 5 data gen np5hhsize = n5d3 replace np5hhsize = 2 if n5d2a==0 // just pcg and child *Check sum np5hhsize *Making PCG hhsize variable gen pc5hhsize = m5hhsizea if pc5statgen==1 replace pc5hhsize = f5hhsizea if pc5statgen==2 replace pc5hhsize = np5hhsize if pc5statgen==3 replace pc5hhsize = pc5statgen if pc5statgen>=. label var pc5hhsize "Primary caregiver household size" label values pc5hhsize missingL *Check bysort pc5statgen: sum pc5hhsize //Making variable that replaces those with missing pcg status with bio. mom's gen pc5hhsizeimm = pc5hhsize replace pc5hhsizeimm = m5hhsizea if pc5statgen==.n label var pc5hhsizeimm "Primary caregiver household size" label values pc5hhsizeimm missingL *Check bysort pc5statgen: sum pc5hhsizeimm //Making missing household size dummies gen pc5hhsizem1=pc5hhsize>=. label var pc5hhsizem1 "Missing primary caregiver's household size" gen pc5hhsizem2=pc5hhsizeimm>=. label var pc5hhsizem2 "Missing primary caregiver's household size" *Check tab pc5hhsizem1, m tab pc5hhsizem2, m //Mean-centered household size summ pc5hhsize, meanonly gen pc5hhsizec = pc5hhsize-r(mean) label var pc5hhsizec "Primary caregiver mean-centered household size" label values pc5age missingL *Check sum pc5hhsize pc5hhsizec //Making variable that replaces those with missing pcg status with bio. mom's summ pc5hhsizeimm, meanonly gen pc5hhsizecimm = pc5hhsizeimm-r(mean) label var pc5hhsizecimm "Primary caregiver mean-centered household size" label values pc5ageimm missingL *Check sum pc5hhsizeimm pc5hhsizecimm //NUMBER OF KIDS IN HOUSEHOLD\\ //mother most recent data for kids in household gen m5kids = cm5kids replace m5kids = cm4kids if m5hhsize>=. & cm4kids<. replace m5kids = cm3kids if m5hhsize>=. & cm3kids<. *Check sum m5kids *father most recent data for kids in household gen f5kids = cf5kids replace f5kids = cf4kids if f5hhsize>=. & cf4kids<. replace f5kids = cf3kids if f5hhsize>=. & cf3kids<. *Check sum f5kids //non-parental primary caregiver *first, code if the person is a child foreach var in n5d3c_1 n5d3c_2 n5d3c_3 n5d3c_4 n5d3c_5 /// n5d3c_6 n5d3c_7 n5d3c_8 { gen `var'k = `var'<18 tab `var' `var'k, m } *now adding up n5d3c_1-8 egen np5kids = rowtotal(n5d3c_1k n5d3c_2k n5d3c_3k /// n5d3c_4k n5d3c_5k n5d3c_6k /// n5d3c_7k n5d3c_8k), miss replace np5kids = 1 if n5d2a==0 // just pcg and child *Check sum np5kids //Making # kids in pcg household gen pc5kids = m5kids if pc5statgen==1 replace pc5kids = f5kids if pc5statgen==2 replace pc5kids = np5kids if pc5statgen==3 replace pc5kids = pc5statgen if pc5statgen>=. label var pc5kids "Primary caregiver number of children in household" label values pc5kids missingL *Check bysort pc5statgen: sum pc5kids //Making # of kids in pcg household imputed mother gen pc5kidsimm = pc5kids replace pc5kidsimm = m5kids if pc5statgen==.n label var pc5kidsimm "Primary caregiver number of children in household" label values pc5kidsimm missingL *Check bysort pc5statgen: sum pc5kidsimm *replacing number of kids to 1 if it is 0 recode pc5kids 0=1 recode pc5kidsimm 0=1 //making missing household size dummies gen pc5kidsm1=pc5kids>=. label var pc5kidsm1 "Missing primary caregiver's number of children in household" gen pc5kidsm2=pc5kidsimm>=. label var pc5kidsm2 "Missing primary caregiver's number of children in household" *Check tab pc5kidsm1 tab pc5kidsm2 order pc5hhsizec* pc5hhsize* pc5hhsizem* pc5kidsm* pc5kidsimm pc5kids, /// after(pc5intact15) /******************************************************************************* 5: PRIMARY CAREGIVER HEALTH AND BEHAVIOR *******************************************************************************/ /************************************** DEPRESSION ***************************************/ //Liberal Depression Definition //Mother depression imputing back to survey 2 gen m5deplib = cm5md_case_lib replace m5deplib = cm4md_case_lib if m5deplib>=. replace m5deplib = cm3md_case_lib if m5deplib>=. replace m5deplib = cm2md_case_lib if m5deplib>=. *Check tab m5deplib //Father depression imputing back to survey 2 gen f5deplib = cf5md_case_lib replace f5deplib = cf4md_case_lib if f5deplib>=. replace f5deplib = cf3md_case_lib if f5deplib>=. replace f5deplib = cf2md_case_lib if f5deplib>=. *Check tab f5deplib //Non-parental primary caregiver depression gen np5deplib = cn5md_case_lib *Check tab np5deplib, m //Making PCG depression variable gen pc5deplib = m5deplib if pc5statgen==1 replace pc5deplib = f5deplib if pc5statgen==2 replace pc5deplib = np5deplib if pc5statgen==3 replace pc5deplib = pc5statgen if pc5statgen>=. label var pc5deplib "Primary caregiver depression liberal" label values pc5deplib missingL *Check bysort pc5statgen: tab pc5deplib, m //Making variable that replaces those with missing pcg status with bio. mom's gen pc5deplibimm = pc5deplib replace pc5deplibimm = m5deplib if pc5statgen==.n label var pc5deplibimm "Primary caregiver depression liberal" label values pc5deplibimm missingL *Check bysort pc5statgen: tab pc5deplibimm, m //making missing depression dummies gen pc5depm1=pc5deplib>=. label var pc5depm1 "Primary caregiver's depression status is missing" gen pc5depm2=pc5deplibimm>=. label var pc5depm2 "Primary caregiver's depression status is missing" *Check tab pc5depm1 pc5deplib, m col tab pc5depm2 pc5deplibimm, m col order pc5depm* pc5deplibimm pc5deplib, after(pc5kids) /********************* DRINKING *********************/ //mother is primary caregiver or pcg not present in wave 5 imputing back to survey 3 //Mother drinks back to survey 3 gen m5drinks = m5g19 replace m5drinks= m4j20 if m5drinks>=. replace m5drinks= m3j28 if m5drinks>=. *Check tab m5drinks, m //Father drinks gen f5drinks= f5g19 replace f5drinks= f4j20 if f5drinks>=. replace f5drinks= f3j33 if f5drinks>=. *Check tab f5drinks, m //Making PCG drinks variable gen pc5drinks = m5drinks if pc5statgen==1 replace pc5drinks = f5drinks if pc5statgen==2 replace pc5drinks = .i if pc5statgen==3 replace pc5drinks = pc5statgen if pc5statgen>=. label var pc5drinks "Primary caregiver highest # drinks in one day last 12 months" //Making variable that replaces those with missing pcg status with bio. mom's gen pc5drinksimm = pc5drinks replace pc5drinksimm = m5drinks if pc5statgen==.n label var pc5drinksimm "Primary caregiver highest # drinks in one day last 12 months" //recoding so 10+ is all one category recode pc5drinks 4=3 recode pc5drinksimm 4=3 label define pc5drinksL 0 "0" 1 "1-3" 2 "4-10" 3 "10+" /// .n "Not in Wave" .z "Missing" .i "N/A" /// .r "Refuse" .d "Don't Know" label values pc5drinks pc5drinksL label values pc5drinksimm pc5drinksL *Check bysort pc5statgen: tab pc5drinks, m bysort pc5statgen: tab pc5drinksimm, m //making missing drinking dummies gen pc5drinksm1=pc5drinks>=. label var pc5drinksm1 "Missing primary caregiver's drinking" gen pc5drinksm2=pc5drinksimm>=. label var pc5drinksm2 "Missing primary caregiver's drinking" *Check tab pc5drinksm1 pc5drinks, m col tab pc5drinksm2 pc5drinksimm, m col order pc5drinksm* pc5drinksimm pc5drinks, after(pc5deplib) /************************************** DELINQUENCY/IMPULSIVITY ***************************************/ //if the respondent responds agree or strongly agree with any of the following //variables, they are coded as yes //do things without considering consequences, get in trouble bc/ don't think, //trouble with law, lie/cheat, fight, don't feel guilty //mother delinquency egen m5delinqd=anymatch( /// m4j25a1 m4j25a2 m4j25b1 m4j25b2 m4j25b3 m4j25b4 /// m3j44c m3j44d) /// , values (1 2) replace m5delinqd=m4j25b1 if m4j25b1>=. //m4j25b1 has lowest missing of all these vars *Check tab m5delinqd, m //father delinquency egen f5delinqd=anymatch( /// f4j25a1 f4j25a2 f4j25b1 f4j25b2 f4j25b3 f4j25b4 //// f2j23 f2j24) /// , values (1 2) replace f5delinqd=f4j25b1 if f4j25b1>=. //f4j25b1 has lowest missing of all these vars *Check tab f5delinqd, m //making the pcg delinquency var gen pc5delinqd=m5delinqd if pc5statgen==1 replace pc5delinqd=f5delinqd if pc5statgen==2 replace pc5delinqd=.i if pc5statgen==3 replace pc5delinqd= pc5statgen if pc5statgen>=. label values pc5delinqd missingL label var pc5delinqd "Primary caregiver delinquency dummy" //Making variable that replaces those with missing pcg status with bio. mom's gen pc5delinqdimm= pc5delinqd replace pc5delinqdimm= m5delinqd if pc5statgen==.n label values pc5delinqdimm missingL label var pc5delinqdimm "Primary caregiver delinquency dummy" *Check bysort pc5statgen: tab pc5delinqd, m bysort pc5statgen: tab pc5delinqdimm, m //making a missing delinquency dummy gen pc5delinqdm1=pc5delinqd>=. label var pc5delinqdm1 "Missing primary caregiver's delinquency" gen pc5delinqdm2=pc5delinqdimm>=. label var pc5delinqdm2 "Missing primary caregiver's delinquency" *Check tab pc5delinqdm1 pc5delinqd, m col tab pc5delinqdm2 pc5delinqdimm, m col order pc5delinqdm* pc5delinqdimm pc5delinqd, after(pc5drinks) /****************************************************************************** 6: PRIMARY CAREGIVER EMPLOYMENT AND ECONOMIC INDICATORS *******************************************************************************/ /************************************** ECONOMIC HARDSHIP ***************************************/ *number of hardships and a dummy for any hardship //note: the variables below were selected to maintain as much consistency as // possible the non-parental survey 5 (ng) *hungry and telephone local hardship m2h19a m2h19c m2h19d m2h19e m2h19f m2h19g m2h19i m2h19j m2h19k /// m2h19l /// m5f23a m5f23b m5f23c m5f23d m5f23e m5f23f m5f23g m5f23h m5f23i /// m5f23j /// f2h17a f2h17c f2h17d f2h17e f2h17f f2h17g f2h17i f2h17j f2h17k /// f2h17l /// f5f23a f5f23b f5f23c f5f23d f5f23e f5f23f f5f23g f5f23h f5f23i /// f5f23j /// n5g1a n5g1b n5g1c n5g1d n5g1d n5g1e n5g1f n5g1g n5g1h n5g1i n5g1j *dropping the ones from survey 2 that don't align with the others drop f2h17b m2h19b f2h17h m2h19h //first, have to recode them so 0 is no, and 1 is yes, and recode missings foreach var in `hardship' { recode `var' 2=0 -9/-1 =. } //Mother hardship *first, mother economic hardship in survey 5 egen m5hardship = rowtotal(m5f23a-m5f23j), m tab m5hardship, m *next, mother economic hardship in survey 2 egen m2hardship = rowtotal(m2h19a-m2h19l), m tab m2hardship, m *replacing survey 5 hardship with previous hardship if missing replace m5hardship=m2hardship if m5hardship==. tab m5hardship, m //Father hardship *first, father economic hardship in survey 5 egen f5hardship = rowtotal(f5f23a-f5f23j), m tab f5hardship, m *next, father economic hardship in survey 2 egen f2hardship = rowtotal(f2h17a-f2h17l), m tab f2hardship, m *replacing survey 5 hardship with previous hardship if survey 5 is missing replace f5hardship=f2hardship if f5hardship==. tab f5hardship, m //Non-parental primary caregiver hardship *first, father economic hardship in survey 5 egen np5hardship = rowtotal(n5g1a-n5g1j), m tab np5hardship, m //Primary Caregiver Hardship- count variable gen pc5hardship=m5hardship if pc5statgen==1 replace pc5hardship=f5hardship if pc5statgen==2 //father replace pc5hardship=np5hardship if pc5statgen==3 //non-parental pcg replace pc5hardship = pc5statgen if pc5statgen>=. label var pc5hardship "Primary caregiver number economic hardships" label values pc5hardship misisngL *Check bysort pc5statgen: tab pc5hardship, m //Making variable that replaces those with missing pcg status with bio. mom's gen pc5hardshipimm=pc5hardship replace pc5hardshipimm=m5hardship if pc5statgen==.n label var pc5hardshipimm "Primary caregiver number economic hardships" label values pc5hardshipimm misisngL *Check bysort pc5statgen: tab pc5hardshipimm, m //Dummy variables for economic hardship gen pc5hardshipd=pc5hardship>0 if pc5hardship<. label var pc5hardshipd "Primary caregiver experienced economic hardship dummy" label values pc5hardshipd misisngL *Check bysort pc5statgen: tab pc5hardshipd, m gen pc5hardshipdimm=pc5hardshipimm>0 if pc5hardshipimm<. label var pc5hardshipdimm "Primary caregiver experienced economic hardship dummy" label values pc5hardshipdimm misisngL *Check bysort pc5statgen: tab pc5hardshipdimm, m //making missing hardhip dummies gen pc5hardshipm1=pc5hardship>=. label var pc5hardshipm1 "Missing primary caregiver's economic hardship" gen pc5hardshipm2=pc5hardshipimm>=. label var pc5hardshipm2 "Missing primary caregiver's economic hardship" *Check tab pc5hardshipm1 pc5hardshipd, m col tab pc5hardshipm2 pc5hardshipdimm, m col order pc5hardshipm* pc5hardshipd* pc5hardshipimm pc5hardship, after(pc5delinqd) /************************************** CURRENT EMPLOYMENT STATUS ***************************************/ //mother employment imputed back to survey 2 gen m5emp = m5i4 replace m5emp = m4k4 if m5emp>=. replace m5emp = m3k4 if m5emp>=. replace m5emp = m2k5 if m5emp>=. //father employment imputed back to survey 2 gen f5emp = f5i4 replace f5emp = f4k4 if f5emp>=. replace f5emp = f3k4 if f5emp>=. replace f5emp = f2k8 if f5emp>=. //recoding so that 1 is yes and 0 is no recode m5emp 2=0 recode f5emp 2=0 *Check tab m5emp, m tab f5emp, m //non-parental primary caregiver gen np5emp = n5e2 *recoding unemployed to 0 & employed to 1 for the non-parental caregiver recode np5emp 1/2=1 4/101=0 //Making the PCG employed variable gen pc5emp= m5emp if pc5statgen==1 replace pc5emp= f5emp if pc5statgen==2 replace pc5emp= np5emp if pc5statgen==3 replace pc5emp= pc5statgen if pc5statgen>=. label var pc5emp "Primary caregiver employed" label values pc5emp missingL *Check bysort pc5statgen: tab pc5emp, m //Making variable that replaces those with missing pcg status with bio. mom's gen pc5empimm= pc5emp replace pc5empimm= m5emp if pc5statgen==.n label var pc5empimm "Primary caregiver employed" label values pc5empimm missingL *Check bysort pc5statgen: tab pc5empimm, m //making missing employment status dummies gen pc5empm1=pc5emp>=. label var pc5empm1 "Missing primary caregiver's employment status" gen pc5empm2=pc5empimm>=. label var pc5empm2 "Missing primary caregiver's employment status" *Check tab pc5empm1 pc5emp, m col tab pc5empm2 pc5empimm, m col order pc5empm* pc5empimm pc5emp, after(pc5hardship) /************************************** SELF-EMPLOYMENT STATUS ***************************************/ //Mother self-employment gen m5selfemp = m5i11 *imputing back to survey 2 replace m5selfemp = m4k11 if m5selfemp>=. replace m5selfemp = m3k11 if m5selfemp>=. replace m5selfemp = m2k9a if m5selfemp>=. *Check tab m5selfemp, m //father self-employment gen f5selfemp = f5i11 *imputing back to survey 2 replace f5selfemp = f4k11 if f5selfemp>=. replace f5selfemp = f3k11 if f5selfemp>=. replace f5selfemp = f2k9a if f5selfemp>=. *Check tab f5selfemp, m //Making the PCG self-employment var gen pc5selfemp = m5selfemp if pc5statgen==1 replace pc5selfemp = f5selfemp if pc5statgen==2 replace pc5selfemp = .i if pc5statgen==3 replace pc5selfemp = pc5statgen if pc5statgen>=. label var pc5selfemp "Primary caregiver is self-employed" label values pc5selfemp missingL //recoding so that 1 is yes and 0 is no recode pc5selfemp 2=0 *Check bysort pc5statgen: tab pc5selfemp, m //Making variable that replaces those with missing pcg status with bio. mom's gen pc5selfempimm = pc5selfemp replace pc5selfempimm = m5selfemp if pc5statgen==.n //recoding so that 1 is yes and 0 is no recode pc5selfempimm 2=0 label var pc5selfempimm "Primary caregiver is self-employed" label values pc5selfempimm missingL *Check bysort pc5statgen: tab pc5selfempimm, m //making a missing self-employment dummy gen pc5selfempm1=pc5selfemp>=. label var pc5selfempm1 "Missing primary caregiver's self-employment status" gen pc5selfempm2=pc5selfempimm>=. label var pc5selfempm2 "Missing primary caregiver's self-employment status" *Check tab pc5selfempm1 pc5selfemp, m col tab pc5selfempm2 pc5selfempimm, m col order pc5selfempm* pc5selfempimm pc5selfemp, after(pc5emp) /************************************************* INDUSTRY- MANUFACTURING, BLUE COLLAR & SERVICE **************************************************/ *only ask in wave 5 //Making indsutry gen m5ind=m5i12 *imputing back to survey 2 replace m5ind=m4k12 if m5ind>=. replace m5ind=m3k12 if m5ind>=. replace m5ind=m2k10bc if m5ind>=. *Check tab m5ind //Father industry gen f5ind=f5i12 *imputing back to survey 2 replace f5ind=f4k12 replace f5ind=f3k12 if f5ind>=. replace f5ind=f2k15bc if f5ind>=. *Check tab f5ind //Making PCG industry var gen pc5ind=m5ind if pc5statgen==1 replace pc5ind=f5ind if pc5statgen==2 replace pc5ind = .i if pc5statgen==3 replace pc5ind = pc5statgen if pc5statgen>=. label var pc5ind "Primary caregiver industry" *Check tab pc5ind //Making variable that replaces those with missing pcg status with bio. mom's gen pc5indimm= pc5ind replace pc5indimm= m5ind if pc5statgen==.n label var pc5indimm "Primary caregiver industry" *Check tab pc5indimm //Manufacturing Industry gen pc5indmanuf = pc5ind recode pc5indmanuf 1/105 107/1000=0 //non-manufacturing jobs are 0 recode pc5indmanuf 106=1 //manufacturing jobs are 1 label var pc5indmanuf "Primary caregiver works in the manufacturing industry" label values pc5indmanuf missingL *Check bysort pc5statgen: tab pc5indmanuf, m //Manufacturing Industry mother imputed gen pc5indmanufimm = pc5indimm recode pc5indmanufimm 1/105 107/1000=0 //non-manufacturing jobs are 0 recode pc5indmanufimm 106=1 //manufacturing jobs are 1 label var pc5indmanufimm "Primary caregiver works in the manufacturing industry" label values pc5indmanufimm missingL *Check bysort pc5statgen: tab pc5indmanufimm, m //Blue-collar industry gen pc5indbc = pc5ind recode pc5indbc 1/104 106 109/1000=0 //non-blue-collar jobs are 0 recode pc5indbc 105 107 108 =1 //blue collar jobs are 1 //blue collar: precision craft, helpers, laborers, and transportation //recoding missing for non-parental pcg replace pc5indbc = .i if pc5statgen==3 label var pc5indbc "Primary caregiver works in a blue-collar job" label values pc5indbc missingL *Check bysort pc5statgen: tab pc5indbc, m //Blue Collar Industry mother imputed gen pc5indbcimm = pc5indimm recode pc5indbcimm 1/104 106 109/1000=0 //non-blue collar jobs are 0 recode pc5indbcimm 105 107 108 =1 //blue collar jobs are 1 label var pc5indbcimm "Primary caregiver works in a blue collar industry" label values pc5indbcimm missingL *Check bysort pc5statgen: tab pc5indmanufimm, m //Service Industry gen pc5indserv = pc5ind recode pc5indserv 1/108 110/1000=0 //non-service jobs are 0 recode pc5indserv 109=1 //service jobs are 1 //recoding missing for non-parental pcg replace pc5indserv = .i if pc5statgen==3 label var pc5indserv "Primary caregiver works in the service industry" label values pc5indserv missingL *Check bysort pc5statgen: tab pc5indserv, m //Blue collar or manufacturing industry gen pc5indbcmanuf = pc5ind recode pc5indbcmanuf 1/104 109/1000=0 //non-blue collar or manuf jobs are 0 recode pc5indbcmanuf 105 106 107 108 =1 //blue collar or manuf jobs are 1 //recoding missing for non-parental pcg replace pc5indbcmanuf = .i if pc5statgen==3 label var pc5indbcmanuf "Primary caregiver works in the blue collar or manufacturing industry" label values pc5indbcmanuf missingL *Check bysort pc5statgen: tab pc5indbcmanuf, m //Blue collar or manufacturing industry mother imputed gen pc5indbcmanufimm = pc5indimm recode pc5indbcmanufimm 1/104 109/1000=0 //non-blue collar or manuf jobs are 0 recode pc5indbcmanufimm 105 106 107 108 =1 //blue collar or manuf jobs are 1 label var pc5indbcmanufimm "Primary caregiver works in blue collar or manufacturing industry" label values pc5indbcmanufimm missingL *Check bysort pc5statgen: tab pc5indbcmanufimm, m //making missing industry dummies gen pc5indm1=pc5ind>=. label var pc5indm1 "Missing primary caregiver's industry" gen pc5indm2=pc5indimm>=. label var pc5indm2 "Missing primary caregiver's industry" *Check tab pc5indm1 pc5ind, m col tab pc5indm2 pc5indimm, m col order pc5indm* pc5indmanuf* pc5indserv pc5indbc* pc5indimm pc5ind, after(pc5selfemp) /************************************** FULL-TIME ***************************************/ //Mother full-time work gen m5wrkft = m5i10 *imputing back to survey 2 replace m5wrkft = m4k10 if m5wrkft>=. replace m5wrkft = m3k10 if m5wrkft>=. replace m5wrkft = m2k9 if m5wrkft>=. *Check tab m5wrkft, m //Father full-time work gen f5wrkft = f5i10 *imputing back to survey 2 replace f5wrkft = f4k10 if f5wrkft>=. replace f5wrkft = f3k10 if f5wrkft>=. replace f5wrkft = f2k9 if f5wrkft>=. *Check tab m5wrkft, m tab f5wrkft, m //Recoding >=35=1 and <35=0 recode m5wrkft 0/34=0 35/1000=1 recode f5wrkft 0/34=0 35/1000=1 *Check tab f5wrkft, m //Non-parental primary caregiver gen np5wrkft = n5e2 *recoding so that working full-time is 1 and all others are 0 recode np5wrkft 2/101=0 *Check tab np5wrkft, m //Making PCG full-time var gen pc5wrkft = m5wrkft if pc5statgen==1 replace pc5wrkft = f5wrkft if pc5statgen==2 replace pc5wrkft = np5wrkft if pc5statgen==3 replace pc5wrkft = pc5statgen if pc5statgen>=. label var pc5wrkft "Primary caregiver works full-time" label values pc5wrkft missingL *Check bysort pc5statgen: tab pc5wrkft, m tab m5i10 pc5wrkft if pc5statgen==1, m //Making variable that replaces those with missing pcg status with bio. mom's gen pc5wrkftimm = pc5wrkft replace pc5wrkftimm = m5wrkft if pc5statgen==.n label var pc5wrkftimm "Primary caregiver works full-time" label values pc5wrkftimm missingL *Check bysort pc5statgen: tab pc5wrkftimm, m //making missing full-time work dummies gen pc5wrkftm1=pc5wrkft>=. label var pc5wrkftm1 "Missing primary caregiver's hours worked" gen pc5wrkftm2=pc5wrkftimm>=. label var pc5wrkftm2 "Missing primary caregiver's hours worked" *Check tab pc5wrkftm1 pc5wrkft, m col tab pc5wrkftm2 pc5wrkftimm, m col order pc5wrkftm* pc5wrkftimm pc5wrkft, after(pc5ind) /************************************** UNION MEMBERSHIP STATUS ***************************************/ //Mother is primary caregiver or pcg is not iin wave 5 gen m5union = m5i14c *imputing with survey 3 union replace m5union = m3i0d if m5union>=. *Check tab m5union //Father is primary caregiver gen f5union = f5i14c *imputing with survey 3 union replace f5union = f3i0d if f5union>=. *Check tab f5union //Recoding 2=0 recode m5union 2=0 recode f5union 2=0 *Check tab m5union tab f5union //Making PCG full-time var gen pc5union = m5union if pc5statgen==1 replace pc5union = f5union if pc5statgen==2 replace pc5union = .i if pc5statgen==3 replace pc5union = pc5statgen if pc5statgen>=. label var pc5union "Primary caregiver belongs to union" label values pc5union missingL *Check bysort pc5statgen: tab pc5union, m tab m5i14c pc5union if pc5statgen==1, m tab f5i14c pc5union if pc5statgen==2, m //Making variable that replaces those with missing pcg status with bio. mom's gen pc5unionimm = pc5union replace pc5unionimm = m5union if pc5statgen==.n label var pc5unionimm "Primary caregiver belongs to union" label values pc5unionimm missingL *Check bysort pc5statgen: tab pc5unionimm, m //making a missing union dummy gen pc5unionm1=pc5union>=. label var pc5unionm1 "Missing primary caregiver's union status" gen pc5unionm2=pc5unionimm>=. label var pc5unionm2 "Missing primary caregiver's union status" *Check tab pc5unionm1 pc5union, m col tab pc5unionm2 pc5unionimm, m col order pc5unionm* pc5unionimm pc5union, after(pc5wrkft) /////dropping all but those variables needed for analysis\\\\\ drop cm1age-f5union /****************************************************************************** 7: SINGLE IMPUTATION *******************************************************************************/ //NOTE: hs and black are omitted *imputing: parents' high ed, intact family, household size, number of kids, * depression, drinks, delinquency, hardship, employment, self-employment * industry, working full time, union local missm2 /// pc5femalem2 pc5racem2 pc5agem2 pc5marcohm2 pc5highedm2 pc5parhighedm2 /// pc5citizenm2 pc5intact15m2 pc5hhsizem2 pc5kidsm2 pc5depm2 pc5drinksm2 /// pc5delinqdm2 pc5hardshipm2 pc5empm2 pc5selfempm2 pc5indm2 pc5wrkftm2 /// pc5unionm2 sum `missm2' //Running imputation-hs and black are omitted categories for categorical vars //imputing those with >3% missing local imputeregimm /// pc5femaleimm pc5whiteimm pc5hispimm pc5otherimm pc5ageimm /// pc5marcohimm pc5lhsimm pc52yrimm pc54yrimm pc5citizenimm local toimputeimm /// pc5parhighedimm pc5intact15imm pc5kidsimm /// pc5deplibimm pc5drinksimm pc5delinqdimm pc5hardshipdimm /// pc5empimm pc5selfempimm pc5indmanufimm pc5indbcimm /// pc5wrkftimm pc5unionimm foreach var in `toimputeimm' { impute `var' `imputeregimm', gen(`var'i) sum `var' `var'i } //making a "layoff missing" variable gen layoffm1= layoff>=. label var layoffm1 "Missing layoff" tab layoff /****************************************************************************** SAVE DATASET *******************************************************************************/ save "data/cleaned.dta", replace