/**--------------------------------------------------------------------**/ /** Program created by: Thierry Kruten **/ /** Updated on : 31 MAY 2013 **/ /** Updated on : 15 SEPTEMBER 2021 FOR NEW BOTTOM AND TOP CODING METHODOLOGY (Heba Omar) **/ /** Font used to get a nice layout - Courier New (SAS default font) **/ /**--------------------------------------------------------------------**/ TITLE " " ; /**---------------------------------------------------------------------**/ /** PART I: MACRO DECLARATIONS **/ /**---------------------------------------------------------------------**/ /**---------------------**/ /** DATA PREPARATION **/ /**---------------------**/ %MACRO prep ; *-----------------------------------------------------------------------; * Clean the dataset and create equivalized income and weights ; *-----------------------------------------------------------------------; DATA start ; SET &&&cc.&yy.h (KEEP=did hid hwgt nhhmem dhi nhhmem17 nhhmem65 hpartner); * Create person weight as hweight times household member ; wt = hwgt * nhhmem ; * create children weight as hweight times number of persons; ct = hwgt * nhhmem17; * create dummy variable "elderly" as existence of at least one elderly person within the household ; elderly = 0 ; IF (nhhmem65>0) THEN elderly = 1 ; * create elderly weight as hweight times number of persons ; et = hwgt * nhhmem65; ***Bottom and top coding / outlier detection*** ; * Select only records when DHI non-missing; IF dhi=. THEN DELETE; * recode negative DHI into zero IF (dhi<0) THEN dhi=0; dhi_log=log(dhi); * keep negatives and 0 in the overall distribution of non-missing dhi; IF( (dhi_log=.) AND (dhi^=.) ) THEN dhi_log=0; RUN; * detect interquartile range; * NOTE: Defining percentiles requires that the dataset is sorted by equivalized income ; PROC SORT DATA=start ; BY did dhi_log; RUN ; PROC UNIVARIATE DATA=start NOPRINT; VAR dhi_log ; WEIGHT hwgt; OUTPUT OUT= temp P25=q25 P75=q75; RUN ; DATA _NULL_; SET temp; CALL SYMPUT("b",q25); CALL SYMPUT("t",q75); RUN; DATA start ; SET start ; iqr=&t-&b; * detect upper bound for extreme values; upper_bound=&t + (iqr * 3) ; lower_bound=&b - (iqr * 3); * top code income at upper bound for extreme values ; IF dhi>exp(upper_bound) THEN dhi=exp(upper_bound) ; IF dhi0)) THEN kidsm = 1 ; RUN ; * NOTE: Gini and quintiles require that the dataset is sorted by equivalized income; PROC SORT DATA=prepkf ; BY did ey ; RUN ; %MEND Prep ; /**--------------------**/ /** GINI COEFFICIENT **/ /**--------------------**/ %MACRO gini ; *---------------- METHOD 1 ----------------- ; DATA &cc.&yy._1 (KEEP=did gini) ; IF _N_ = 1 THEN DO UNTIL (last) ; SET prepkf END=last; swt + wt ; swtey + (wt*ey) ; END ; SET prepkf END=eof; BY did ; IF _N_ = 1 THEN DO ; prewt = 0 ; preey = 0 ; up = 0 ; sum = 0 ; END ; * NOTE: Optional - Apply a standard key figures format ; FORMAT gini 6.3 ; cwt + wt ; cwtey + (ey*wt) ; pcwt = cwt / swt * 100 ; pcwtey = cwtey / swtey * 100 ; up = (pcwt-prewt) * (pcwtey+preey) ; sum + up ; prewt = pcwt ; preey = pcwtey ; RETAIN prewt preey ; IF eof THEN DO ; gini = 1 - (sum / 10000) ; OUTPUT ; END ; RUN; *---------------- METHOD 2 ----------------- ; /* DATA &cc.&yy._1 (KEEP=did gini) ; SET start END=eof; BY did ; RETAIN swt swtey swt2ey swteycw; * Initialise temp variables; IF _N_ = 1 THEN DO ; swt = 0; swtey = 0; swt2ey = 0; swteycw = 0; END ; * Calculation; swt = swt + wt ; swtey = swtey + (wt*ey) ; swt2ey = swt2ey + (wt*wt*ey) ; swteycw = swteycw + (swt*wt*ey); * Output Gini at the last observation; IF eof THEN DO ; gini = 100*((2*swteycw-swt2ey)/(swt * swtey)-1); OUTPUT ; END ; */ %MEND gini; /**--------------------**/ /** ATKINSON INDEXES **/ /**--------------------**/ %MACRO atkin ; DATA tempo ; IF _N_ = 1 THEN DO UNTIL (last) ; SET prepkf END=last; swt + wt ; swtey + (wt*ey) ; END ; SET prepkf ; BY did ; mwtey = swtey / swt ; yy = (ey/mwtey) ; RUN ; DATA &cc.&yy._2 (KEEP=did atk5 atk1); SET tempo END=eof; BY did ; * NOTE: Optional - Apply a standard key figures format ; FORMAT atk5 atk1 6.3 ; ratio1 = log(yy) ; cwtratio1 + (ratio1*(wt/swt)) ; ratio5 = yy**(1-0.5) ; cwtratio5 + (ratio5*(wt/swt)) ; IF eof THEN DO ; right5 = (cwtratio5)**(1/0.5); atk5 = 1 - right5 ; right1 = exp((cwtratio1)) ; atk1 = 1 - right1 ; OUTPUT ; END ; RUN; %MEND atkin ; /**--------------------**/ /** QUINTILES RATIOS **/ /**--------------------**/ %MACRO quantile ; PROC UNIVARIATE DATA=prepkf NOPRINT; BY did ; VAR ey ; WEIGHT wt ; OUTPUT OUT = tmp PCTLPTS = 10 20 50 80 90 PCTLPRE = dec ; RUN ; DATA &cc.&yy._3 (KEEP=did d9010 d9050 d8020) ; MERGE prepkf (KEEP=did) tmp ; * NOTE: Optional - Apply a standard key figures format ; FORMAT d9010 d9050 d8020 6.3 ; d9010 = dec90 / dec10 ; d9050 = dec90 / dec50 ; d8020 = dec80 / dec20 ; IF _N_ = 1 ; RUN; %MEND quantile ; /**-------------------------**/ /** RELATIVES POVERTY RATES **/ /**-------------------------**/ %MACRO poverty ; /***** TOTAL POPULATION *****/ PROC MEANS DATA=prepkf MEAN NOPRINT; BY did ; VAR poor4 poor5 poor6; WEIGHT wt ; OUTPUT OUT=&cc.&yy._4 MEAN=poorall4 poorall5 poorall6 ; RUN; DATA &cc.&yy._4 (KEEP=did poorall4 poorall5 poorall6) ; MERGE start (KEEP=did) &cc.&yy._4 ; FORMAT poorall4 poorall5 poorall6 6.3 ; poorall4 = poorall4 * 100; poorall5 = poorall5 * 100; poorall6 = poorall6 * 100; IF _N_ = 1 ; RUN ; /***** CHILDREN *****/ PROC MEANS DATA=prepkf MEAN NOPRINT; BY did ; VAR poor4 poor5 poor6; WEIGHT ct ; OUTPUT OUT=&cc.&yy._5 MEAN=poork4 poork5 poork6 ; RUN; DATA &cc.&yy._5 (KEEP=did poork4 poork5 poork6) ; MERGE start (KEEP=did) &cc.&yy._5 ; FORMAT poork4 poork5 poork6 6.3 ; poork4 = poork4 * 100; poork5 = poork5 * 100; poork6 = poork6 * 100; IF _N_ = 1 ; RUN ; /***** ELDERLY *****/ PROC MEANS DATA=prepkf MEAN NOPRINT; BY did ; VAR poor4 poor5 poor6; WEIGHT et ; OUTPUT OUT=&cc.&yy._6 MEAN=poore4 poore5 poore6 ; RUN; DATA &cc.&yy._6 (KEEP=did poore4 poore5 poore6) ; MERGE start (KEEP=did) &cc.&yy._6 ; FORMAT poore4 poore5 poore6 6.3 ; poore4 = poore4 * 100; poore5 = poore5 * 100; poore6 = poore6 * 100; IF _N_ = 1 ; RUN ; /***** DIST CHILDREN *****/ PROC MEANS DATA=prepkf MEAN NOPRINT; BY did ; VAR poor57 poor715 poor15; WEIGHT ct ; OUTPUT OUT=&cc.&yy._7 MEAN=d5075 d75150 d150 ; RUN; DATA &cc.&yy._7 (KEEP=did d5075 d75150 d150) ; MERGE start (KEEP=did) &cc.&yy._7 ; FORMAT d5075 d75150 d150 6.3 ; d5075 = d5075 * 100; d75150 = d75150 * 100; d150 = d150 * 100; IF _N_ = 1 ; RUN ; /***** POVERTY RATES BY CHIDREN FAMILY TYPE *****/ * Two parents family ; DATA twoParen ; SET prepkf ; IF ((hpartner = 1) AND (nhhmem > 2) AND (nhhmem17 > 0)) ; RUN; PROC MEANS DATA=twoParen MEAN NOPRINT ; BY did ; VAR poor5 ; WEIGHT ct ; OUTPUT OUT=&cc.&yy._8 MEAN=poortp ; RUN; DATA &cc.&yy._8 (KEEP=did poortp) ; MERGE start (KEEP=did) &cc.&yy._8 ; FORMAT poortp 6.3 ; poortp = poortp * 100; IF _N_ = 1 ; RUN ; * Single-mother family ; DATA singMoth ; SET prepkf ; IF ((hpartner = 0) AND (sex = 2) AND (nhhmem17>0)); RUN; PROC MEANS DATA=singMoth MEAN NOPRINT ; BY did ; VAR poor5 ; WEIGHT ct ; OUTPUT OUT=&cc.&yy._9 MEAN=poorsm ; RUN; DATA &cc.&yy._9 (KEEP=did poorsm) ; MERGE start (KEEP=did) &cc.&yy._9 ; FORMAT poorsm 6.3 ; poorsm = poorsm * 100; IF _N_ = 1 ; RUN ; * % of children living in a single-mother family ; PROC MEANS DATA=prepkf MEAN NOPRINT; BY did ; VAR kidsm ; WEIGHT ct ; OUTPUT OUT=&cc.&yy._10 MEAN=pkidsm ; RUN; DATA &cc.&yy._10 (KEEP=did pkidsm) ; MERGE start (KEEP=did) &cc.&yy._10 ; FORMAT pkidsm 6.3 ; pkidsm = pkidsm * 100; IF _N_ = 1 ; RUN ; %MEND poverty; /*------------------------*/ /* MACRO DISPLAY RESULTS */ /*------------------------*/ %MACRO show ; * Compile result in a temporary SAS dataset ; DATA &cc.&yy._kf ; MERGE %DO i = 1 %TO 10 ; &cc.&yy._&i %END ; ; BY did ; ATTRIB gini label='Gini Coefficient ' format=10.4 atk5 label='Atkinson (epsilon=0.5) ' format=10.4 atk1 label='Atkinson (epsilon=1.0) ' format=10.4 d9010 label='Percentile ratio(90/10)' format=10.4 d9050 label='Percentile ratio(90/50)' format=10.4 d8020 label='Percentile ratio(80/20)' format=10.4 PoorAll4 label='Relative Poverty Rates - Total Population (40%)' format=10.4 PoorAll5 label='Relative Poverty Rates - Total Population (50%)' format=10.4 PoorAll6 label='Relative Poverty Rates - Total Population (60%)' format=10.4 PoorK4 label='Relative Poverty Rates - Children (40%)' format=10.4 PoorK5 label='Relative Poverty Rates - Children (50%)' format=10.4 PoorK6 label='Relative Poverty Rates - Children (60%)' format=10.4 PoorE4 label='Relative Poverty Rates - Elderly (40%)' format=10.4 PoorE5 label='Relative Poverty Rates - Elderly (50%)' format=10.4 PoorE6 label='Relative Poverty Rates - Elderly (60%)' format=10.4 D5075 label='Distribution of Children by income group (50-75%)' format=10.4 D75150 label='Distribution of Children by income group (75-150%)' format=10.4 D150 label='Distribution of Children by income group (above 150%)' format=10.4 Poortp label='Children Poverty Rates - Two Parents Family (50%)' format=10.4 Poorsm label='Children Poverty Rates - Single Mother Family (50%)' format=10.4 Pkidsm label='% Children living in Single Mother Family' format=10.4 ; RUN ; * Create the outputs ; %LET dataset= &cc.&yy._KF ; %LET formgen = 15.4 ; %LET bordure = '|----|+|---+' ; %LET classe = did ; %LET variable = gini atk5 atk1 ; %LET table = did, gini atk5 atk1 ; %LET box = 'Gini & Atkinson index' ; %LET rts = 40 ; %LET lab = sum = ' ' ; %TABULATE %LET dataset= &cc.&yy._KF ; %LET formgen = 15.4 ; %LET bordure = '|----|+|---+' ; %LET classe = did ; %LET variable = d9010 d9050 d8020 ; %LET table = did, d9010 d9050 d8020 ; %LET box = 'Percentile ratios' ; %LET rts = 40 ; %LET lab = sum = ' ' ; %TABULATE %LET dataset= &cc.&yy._KF ; %LET formgen = 15.4 ; %LET bordure = '|----|+|---+' ; %LET classe = did ; %LET variable = poorAll4 poorAll5 poorAll6 ; %LET table = did, poorAll4 poorAll5 poorAll6 ; %LET box = 'Relative Poverty Rates (Total)' ; %LET rts = 40 ; %LET lab = sum = ' ' ; %TABULATE %LET dataset= &cc.&yy._KF ; %LET formgen = 15.4 ; %LET bordure = '|----|+|---+' ; %LET classe = did ; %LET variable = poork4 poork5 poork6 ; %LET table = did, poork4 poork5 poork6 ; %LET box = 'Relative Poverty Rates (Children)'; %LET rts = 40 ; %LET lab = sum = ' ' ; %TABULATE %LET dataset= &cc.&yy._KF ; %LET formgen = 15.4 ; %LET bordure = '|----|+|---+' ; %LET classe = did ; %LET variable = poore4 poore5 poore6 ; %LET table = did, poore4 poore5 poore6 ; %LET box = 'Relative PovertyRates (Elderly)' ; %LET rts = 40 ; %LET lab = sum = ' ' ; %TABULATE %LET dataset= &cc.&yy._KF ; %LET formgen = 15.4 ; %LET bordure = '|----|+|---+' ; %LET classe = did ; %LET variable = d5075 d75150 d150 ; %LET table = did, d5075 d75150 d150 ; %LET box = 'Distribution of Children Living in Different Income Household ' ; %LET rts = 40 ; %LET lab = sum = ' ' ; %TABULATE %LET dataset= &cc.&yy._KF ; %LET formgen = 15.4 ; %LET bordure = '|----|+|---+' ; %LET classe = did ; %LET variable = poortp poorsm pkidsm ; %LET table = did, poortp poorsm pkidsm ; %LET box = 'Poverty Rates for Children by Family Type' ; %LET rts = 40 ; %LET lab = sum = ' ' ; %TABULATE %MEND show ; /*-----------------------*/ /* MACRO: PROC TABULATE */ /*-----------------------*/ %MACRO tabulate ; PROC TABULATE DATA=&dataset FORMAT=&formgen FORMCHAR=&bordure NOSEPS ; CLASS &classe ; VAR &variable ; TABLE &table / CONDENSE PRINTMISS BOX="&box" RTS=&rts ; KEYLABEL &lab ; RUN ; %MEND tabulate ; /*************************************************************************/ /** PART II: RUN THE KEY FIGURE PROGRAM **/ /*************************************************************************/ /*----------------------------------*/ /* JOB SUBMISSION: DEFINE DATASET */ /*----------------------------------*/ %LET cc = your-cc; %LET yy = your-yy; %PREP %GINI %ATKIN %QUANTILE %POVERTY %SHOW