Professional Documents
Culture Documents
Copyright(c) 2002 by SAS Institute Inc., Cary, NC, USA SAS Publications order # 57496 ISBN 1-59047-080-X */ */
/*-------------------------------------------------------------------*/ /* */ */
/* are no warranties, expressed or implied, as to merchantability or */ /* fitness for a particular purpose regarding the materials or code */ /* contained herein. The Institute is not responsible for errors /* in this material as it now exists or will exist, nor does the /* Institute provide technical support for it. /* */ */ */ */
/*-------------------------------------------------------------------*/ /* Questions or problem reports concerning this material may be /* addressed to the author: /* /* SAS Institute Inc. /* Books by Users /* Attn: Daniel Zelterman /* SAS Campus Drive /* Cary, NC 27513 /* /* */ */ */ */ */ */ */ */ */ */
/* If you prefer, you can send email to: sasbbu@sas.com /* Use this for subject field: /* /* Comments for Daniel Zelterman */ */ */
*/
/*-------------------------------------------------------------------*/
Program 1.1:
title1 'Beetle mortality and pesticide dose'; data beetle; input y n dose; label y n = 'number killed in group' = 'number in dose group'
run;
title2 'Fit a linear dose effect to the binomial data'; proc genmod; model y/n=dose / dist=binomial link=identity obstats; run;
title2 'Logistic regression in GENMOD'; proc genmod; model y/n=dose / dist=binomial obstats; run;
title2 'Probit regression in GENMOD'; proc genmod; model y/n=dose / dist=binomial link=probit obstats; run;
Program 2.1
ods printer file='c:\Table2-1.ps' ps; ods printer select Genmod.ModelFit; ods listing select Genmod.Modelfit;
data; input count expos tumor alpha beta; label tumor = 'mice with tumors' expos = 'exposure status' alpha = 'row effects' beta = 'column effect' ; datalines; 4 1 1 1 1 5 0 1 1 -1 12 1 0 -1 1 79 0 0 -1 -1 run;
proc genmod;
class tumor expos; model count = tumor expos /dist = Poisson obstats; run;
proc genmod;
proc genmod;
Program 2.2
title1 'Perinatal mortality'; data; input gest age cigs vita count @@; label gest = 'Gestational age 261+ days' age = 'Mother over 30 years' cigs = 'More than 6 cigarettes/day' vita = 'Live births' ; datalines; 0 0 0 0 50 0 0 1 1 40 0110 4 0 0 0 1 315 0 1 0 0 41 0 1 1 1 11 0010 9
0 1 0 1 147 1 0 0 0 24
1010
1 0 1 1 459 1110 1
1 1 0 1 1494
title2 'Log-linear model of mutual independence'; proc genmod; /* Produces Output 2.6 */
model count = gest age cigs vita / dist = Poisson; run; title2 'Log-linear model with all pairwise effects'; proc genmod; /* Produces Output 2.7 */
model count = gest | age | cigs | vita @2 / dist = Poisson; run; title2 'Log-linear model with all 3-way interactions'; proc genmod; /* Produces Output 2.8 */
model count = gest | age | cigs | vita @3 / dist = Poisson; run; title2 'A small log-linear model with 5 pairwise interactions'; proc genmod; /* Produces Output 2.9 */
model count = gest age cigs vita gest*age age*cigs gest*vita age*vita cigs*vita / dist = Poisson; run; title2 'A log-linear model with 4 pairwise interactions'; proc genmod; /* Produces Output 2.10 */
model count = gest age cigs vita gest*age age*cigs gest*vita age*vita / dist = Poisson; run;
Program 2.3
proc genmod; model count = gest age cigs vita gest*age age*cigs cigs*vita gest*vita age*vita / dist = Poisson type1 type3; run;
Program 3.1
title1 'Coal miners: breathlessness and wheezing by age'; data; /* Read data as a 2x2 table for each age group. Produce a data set with one frequency per line. input age a b c d; label age br wh bwa = '5 year interval' = 'breathlessness' = 'wheeze' = 'linear age times br-wh interaction'; /* recode age categories: 1 to 9 */ */
age=age/5 - 3; /*
bwa is a log-odds ratio that is linear in age br=1; wh=1; bwa= age; freq=a; output; br=1; wh=0; bwa=-age; freq=b; output; br=0; wh=1; bwa=-age; freq=c; output; br=0; wh=0; bwa= age; freq=d; output; drop a b c d; datalines; 20 25 30 9 23 54 7 95 1841
*/
9 105 1654 19 177 1863 48 257 2357 54 273 1778 88 324 1712
50 404 117 245 1324 55 406 152 225 60 372 106 132 run; title2 'Log-linear model with all pairwise interactions'; proc genmod; /* age as a class variable ignores its ordering */ class age br wh; model freq = br | wh | age @2 / dist = Poisson obstats; run; 967 526
/* bwa models the interaction of br and wh as linear in age */ class age br wh; model freq = br | wh | age @2 bwa / dist = Poisson obstats; run;
Program 3.2
title1 'Trauma and outcome. Data from Agresti and Coull, 1998.'; data; input treat outcome freq common @@; label treat = 'four treatments' outcome= 'five ordered outcomes' txo = 'treatment and outcome interaction'
common = 'common class-style odds ratio' ; txo=treat*outcome; datalines; 1 1 59 1 1 2 25 1 1 3 46 1 1 4 48 1 1 5 32 -4 2 1 48 1 2 2 21 1 2 3 44 1 2 4 47 1 2 5 30 -4 3 1 44 1 3 2 14 1 3 3 54 1 3 4 64 1 3 5 31 -4 4 1 43 -3 4 2 4 -3 4 3 49 -3 4 4 58 -3 4 5 41 12 run; title2 'Independence of treatment and outcome'; proc genmod; class treat outcome; model freq = treat outcome / dist = Poisson; /* This step produces Output 3.3 */ /* linear by linear interaction */
run; title2 'Interaction of a pair of class variables'; proc genmod; /* Output 3.4 is produced by this step */
class treat outcome; model freq = treat outcome treat * outcome / dist = Poisson; run; title2 'Fit the common odds ratio interaction'; proc genmod; /* The fitted values from this step appear in Table 3.5 */ class treat outcome; model freq = treat outcome common / dist = Poisson; run; title2 'Fit the treat * outcome linear by linear interaction'; proc genmod; /* The fitted values from this step appear in Table 3.6 */ class treat outcome; model freq = treat outcome txo /dist = Poisson; run; title2 'Common odds and linear by linear interaction effects'; proc genmod; class treat outcome; model freq = treat outcome common txo / dist = Poisson; run;
Program 4.1
title1 'Triangular stroke data'; data; input count row $ col $; label count = 'number of patients' row col ; datalines; 11 E A 23 E B 12 E C 15 E D 8 E E 9 D A 10 D B 4 D C 1 D D 6 C A 4 C B 4 C C 4 B A 5 B B 5 A A = 'admission status' = 'discharge status'
run;
proc genmod; class row col; model count = row col /* independence of rows and columns */ / run; dist = Poisson obstats;
Program 4.2
b= 1/(1+(col-5.5)**2 +(row-6)**2); /* center bump ob= 1/(1+(col-4.5)**2 +(row-6)**2); /* offset bump e=sqrt(col**2+row**2); rowc = row; colc = col; jr=row+normal(0)/10; label count = 'number of patients' colc rowc = 'left to right category' = 'front to back category'
e h b ob jr datalines;
= 'distance from center' = 'h interaction' = 'bump in the center' = 'offset bump' = 'jittered row' ;
0 1 4 1 1 5 0 1 6 0 1 7 1 2 3 1 2 4 1 2 5 0 2 6 0 2 7 4 2 8 3 3 2 1 3 3 0 3 4 1 3 5 0 3 6 2 3 7 9 3 8 11 3 9 1 4 1 4 4 2 3 4 3 0 4 4 1 4 5 0 4 6 3 4 7 11 4 8 15 4 9 11 4 10 2 5 1 2 5 2 3 5 3 3 5 4 0 5 5 1 5 6 0 5 7 1 5 8 8 5 9 13 5 10 13 6 1 7 6 2 2 6 3 5 6 4 6 6 5 3 6 6 1 6 7 0 6 8 9 6 9 12 6 10 15 7 1 9 7 2 4 7 3 4 7 5 0 7 6 8 7 8 7 7 9 9 7 10 6 8 1 8 8 2 5 8 3 1 8 4 0 8 5 1 8 6 1 8 7 8 8 8 8 8 9 7 8 10 4 9 2 4 9 3 2 9 4 1 9 5 2 9 6 5 9 7 7 9 8 7 9 9 6 10 2 4 10 3 2 10 4 1 10 5 2 10 6 5 10 7 8 10 8 7 10 9 3 11 3 1 11 4 1 11 5 2 11 6 5 11 7 5 11 8 run; proc genmod; class rowc colc; model count =rowc colc / dist=Poisson; run; /* Fit model for indep, h, e, ob and obtain residuals proc genmod; */ /* model of independence for rows and columns */
class rowc colc; model count = h e ob rowc colc / dist=Poisson obstats; ods output obstats=fitted; run; data origin; set stroke; drop rowc colc; run; data both; /* Merge observed and fitted values */ /* Drop class variables */ /* Capture residuals */
merge origin fitted; run; proc gplot; /* Plot chi-squared residuals by jittered row */ / vref=0 haxis=axis1 vaxis=axis2;
Program 4.3
title1 'Baseball games using the Bradley Terry model'; data bball; input wteam $ 1-10 wtmn ltmn freq; label wteam = 'name of winning team' wtmn = 'winning team number'
ltmn = 'losing team number' freq = 'number of wins' ; jitwin=wtmn+normal(0)/10; if ltmn < wtmn then delete; comps=&cons; /* jitter winning team number */ /* omit top half of table */
ph(wtmn)=+1; ph(ltmn)=-1; datalines; Milwaukee 1 2 7 Milwaukee 1 3 9 Milwaukee 1 4 7 Milwaukee 1 5 7 Milwaukee 1 6 9 Milwaukee 1 7 11 Detroit Detroit Detroit Detroit Detroit Detroit Toronto Toronto 2 1 6 2 3 7 2 4 5 2 5 11 2 6 9 2 7 9 3 1 4 3 2 6
3 4 7 3 5 7 3 6 8 3 7 12
New York 4 1 6 New York 4 2 8 New York 4 3 6 New York 4 5 6 New York 4 6 7 New York 4 7 10 Boston Boston Boston Boston Boston Boston 5 1 6 5 2 2 5 3 6 5 4 7 5 6 7 5 7 12
Cleveland 6 1 4 Cleveland 6 2 4 Cleveland 6 3 5 Cleveland 6 4 6 Cleveland 6 5 6 Cleveland 6 7 6 Baltimore 7 1 2 Baltimore 7 2 4 Baltimore 7 3 1 Baltimore 7 4 3
model freq / comps = phi1-phi7 / dist = binomial noint obstats; run; data two; merge fitted bball; output; /* combine fitted and raw data */ /* output lower half of table */ /* rebuild upper half of table */
j=wtmn; wtmn=ltmn; ltmn=j; reschi=-reschi; pred=13-pred; jitwin=wtmn+normal(0)/10; output; drop phi1-phi7; run; proc gplot; plot Streschi * jitwin; run; quit;
=============================================== =========
Program 5.1
title1 'Deaths from testicular cancer in Japan'; data tcancer; input age pop1 c1 pop2 c2 pop3 c3 pop4 c4 pop5 c5; array p(5) pop1-pop5; array d(5) c1-c5; label agegrp = 'age in 5yr interval' logpop = 'log-population'; /* Produce a separate line for each year/age combination agegrp=age/5; do j=1 to 5; deaths=d(j); logpop=log(p(j)); year=j-1; yearc=year; /* age in five year intervals */ /* for each line read in . . */ /* number of cancer deaths */ /* log of population */ /* recode the years as 0,...,4 */ /* year category */ */ /* populations at each year group */ /* cancer cases for each year group */
cohort=agegrp - year + 4; /* identify the diagonal cohort */ output; end; drop j pop1-pop5 c1-c5; datalines; 0 15501 17 26914 51 21027 65 20246 69 21596 74 5 14236 . 25380 6 26613 7 20885 8 20051 7 10 13270 . 23492 3 25324 3 26540 7 20718 11 /* omit unneeded variables */ /* produce five output lines for each read in */
15 12658 2 21881 6 23211 15 24931 25 26182 39 20 10696 5 20402 27 21263 39 22228 56 24033 83 25 7563 5 17242 40 19994 58 20606 97 21805 125 30 7074 7 12609 18 17128 54 19864 77 20750 129 35 7038 10 11712 13 12476 36 17001 70 19890 101 40 6418 9 11478 26 11450 32 12275 29 16794 67 45 5981 7 10274 16 11157 26 11147 34 11962 37 50 4944 7 9325 16 9828 27 10705 27 10741 29 55 3994 7 7562 17 8718 19 9206 32 10086 39 60 3098 6 5902 13 6796 21 7869 21 8399 31 65 2317 4 4244 12 4911 26 5728 29 6715 34 70 1513 7 2845 17 3197 22 3737 25 4448 33 75 688 5 1587 9 1812 10 2061 25 2482 31 80 264 2 583 6 787 6 904 14 1068 9 85 73 2 179 2 246 3 335 3 419 3 run; title2 'Model (5.1): Cohort, age, year; No population information'; proc genmod; class yearc agegrp cohort; model deaths = yearc agegrp cohort / type1 type3 dist=Poisson; run; title2 'Model (5.2): Cohort, age, year, and offset log(Pop)'; proc genmod; class yearc agegrp cohort; ods listing exclude obstats; /* turn off the obstats listing */ output out=fitted reschi=reschi; /* create an output dataset */
model deaths = yearc agegrp cohort / obstats type1 type3 offset=logpop dist=Poisson; run; proc gplot data=fitted; /* bubble plot of residuals */
=============================================== ============
Program 5.2
title1 'Species diversity on the Galapagos Islands'; data species; input name $ 1-13 species area d2neigh d2sc adjsp; loga=log(area); area=area/1000; adjsp=adjsp/100; d2sc=d2sc/100; d2neigh=d2neigh/100; isa=0; if name='Isabela' then isa=1; /* indicator for Isabela */ label species = '# of species on island' area loga = 'in sq-km' = 'log area' /* rescale variables */
adjsp = '# of species on adjacent island'; datalines; Baltra Bartolome Caldwell Champion Coamano 58 25.09 0.6 0.6 44 31 3 25 2 1.24 0.6 26.3 237 .21 2.8 58.7 .10 1.9 47.4 5 2
.05 1.9 1.9 444 .34 8.0 8.0 44 .08 6.0 12.0 18
Daphne Major 18 Daphne Minor 24 Darwin Eden Enderby Espanola Fernandina Gardner A Gardner B Genovesa Isabela Marchena Onslow Pinta Pinzon Las Plazas Rabida 10 8 2
2.33 34.1 290.2 21 .03 0.4 0.4 108 .18 2.6 50.2 25
97 58.27 1.1 88.3 58 93 634.49 4.3 95.3 347 58 5 .57 1.1 93.1 97 .78 4.6 62.2 3
40 17.35 47.4 92.2 51 347 4669.32 0.7 28.1 93 51 129.49 29.1 85.9 104 2 .01 3.3 45.9 25
104 59.56 29.1 119.6 51 108 17.95 10.7 10.7 12 70 .23 0.5 0.6 58 4.89 4.4 24.4 237 8
San Cristobal 280 551.62 45.2 66.6 58 San Salvador 237 572.33 0.2 19.8 70 Santa Cruz Santa Fe 444 903.82 0.6 0.0 8
Santa Maria 285 170.92 2.6 49.2 25 Seymour Tortuga Wolf run; proc print; run; title2 'Fit Poisson model with all pairwise interactions'; proc genmod; ods output obstats=fit; model species = loga | d2neigh | adjsp | d2sc @2 isa / dist=Poisson obstats type1 type3; run; proc plot data=fit; plot reschi * loga; run; title2 'Fit Negative Binomial model with all pairwise interactions'; proc genmod; ods output obstats=fitnb; model species = loga | d2neigh | adjsp | d2sc @2 isa / dist=nb obstats type1 type3; run; /* plot Poisson Pearson residuals */ 44 16 21 1.84 0.6 9.6 58 1.24 6.8 50.9 108 2.85 34.1 254.7 10
Program 6.1
data; input count birth death medic ; label birth = 'names on birth certificate' death = 'names on death certificate' medic = 'names on medical record' ; datalines; 60 0 0 1 49 0 1 0 4011 247 1 0 0 112 1 0 1 142 1 1 0 12 1 1 1 run; title2 'Log-linear model of mutual independence'; proc genmod; /* m, d, b model */
model count = medic birth death /dist = Poisson; run; title2 'Log-linear models with one interaction'; proc genmod; /* d, b*m model */
model count = medic birth death birth*medic / dist = Poisson; run; proc genmod; /* m, b*d model */
model count = medic birth death birth*death / dist = Poisson; run; proc genmod; /* b, d*m model (6.1) with output in Table 6.4 */ model count = medic death birth death*medic / dist = Poisson; run; title2 'Log-linear models with two interactions'; proc genmod; /* b*m, d*b model */
model count = medic birth death birth*medic birth*death /dist = Poisson; run; proc genmod; /* b*d, m*d model */
model count = medic birth death birth*death death*medic /dist = Poisson; run; proc genmod; /* d*m, m*b model */
model count = medic birth death death*medic birth*medic /dist = Poisson; run;
*/
*/ */
dev= -lam + y*log(lam) - log(1-exp(-lam)); /* deviance end; dev = dev + lambda - y*log(lambda) + log(1-eml);
*/
%macro inv(ev); /* Interval bisection to find lambda from the expected value (ev) */ if &ev LE 1 then lam= . ; else do; lamlo=&ev-1; lamhi=&ev; /* expected value must be >1 */ /* lambda is not defined for ev LE 1 */ /* otherwise iterate to find lambda */ /* lambda is between exp value less one */ /* . . . and the expected value */
do until (abs(lamhi-lamlo)<1e-7); /* convergence criteria */ lam=(lamhi+lamlo)/2; mal= lam/(1-exp(-lam)); /* examine midpoint /* mean at midpoint */ */
if mal GE &ev then lamhi=lam; /* lower upper endpoint */ if mal LE &ev then lamlo=lam; /* raise lower endpoint */ end; end; %mend inv;
Program 7.2
data skunk;
input count freq year sex $ @@; yr=year; sx=sex; label count ='count' freq = 'frequency' year = '1977 or 1978' sex = 'F M' datalines; 1 1 77 F 1 3 77 M 6 2 77 M 5 2 78 F run; proc genmod; %tpr; frequency freq; class sex year; model count = sex year year*sex / obstats type1 type3; ods listing exclude obstats; ods output obstats=fitted; run; data fit2; set fitted; drop sex year; run; data both; /* Combine two data sets */ /* omit class variables because these are defined */ /* in two different ways */ /* omit obstats output */ /* Create fitted value dataset */ /* invoke the %tpr macro */ 2 2 77 F 2 0 77 M 1 7 78 F 1 4 78 M 3 4 77 F 4 2 77 F 5 1 77 F 3 3 77 M 4 3 77 M 5 2 77 M 2 7 78 F 3 3 78 F 4 1 78 F 2 3 78 M 3 1 78 M ; /* make copies of year and sex variables */
/* Merge fitted & original values */ /* Estimate of lambda parameter */ /* Est of probability of zero category */ /* 95% CI for lambda */
lamup = exp(xbeta + 1.96*std); lamlow = exp(xbeta - 1.96*std); p0up = exp(-lamlow); p0low = exp(-lamup); se=std*lambda; sep0=std*lambda*p0; run; data; set both; by yr sx; if not ( first.yr | first.sx ) then delete; run; proc print noobs;
/* 95% CI for p0 */
Program 7.3
title1 'Truncated Poisson regression to model lottery winners'; data lottery; input town $ 1-14 winners popul area mill;
dens = popul/area; logpop = log(popul); label popul = 'population in 1000s' logpop = 'log population' area = 'in square miles' mill = 'property tax rate' dens = 'population density'; datalines; Ansonia Beacon Falls Branford Cheshire Clinton Derby East Haven Guilford Hamden Madison Milford N. Branford North Haven Old Saybrook Orange Oxford Seymour 9 3 1 6 9 5 10 2 12 1 6 3 11 6 2 6 9 17.9 5.3 28.0 26.2 12.8 12.0 26.5 20.3 52.0 16.0 49.5 13.1 21.6 9.3 12.5 9.1 14.5 6.2 9.8 27.9 33.0 17.2 5.3 28.9 25.0 22.6 27.1 27.9 29.6 37.1 28.6 34.1 22.3 30.8 26.9 23.4 15.3 23.8 29.0 40.5
33.0 14.7
7 14
36.0 33.0
31.4 23.5
12 1
54.0 8.0
10.6 19.3
title2 'Fit log population as a covariate'; proc genmod; %tpr; /* invoke the %tpr macros */
model winners = mill area dens logpop / type1 type3 maxiter=100 intercept=.3; run; title2 'Fit log population using offset'; proc genmod; %tpr; /* invoke the %tpr macros */ /* increase number of iterations */ /* initial starting value for intercept */
model winners = mill area dens / offset=logpop obstats type1 type3 maxiter=100 intercept=.3; /* increase number of iterations */ /* initial starting value for intercept */ /* create fitted value dataset */
ods listing exclude obstats; /* do not print the obstats data */ run; data both; merge lottery fitted; /* merge the fitted and original values */ lambda=exp(xbeta); run; /* estimated lambda parameter */
title3 'Plot of residuals from the fitted model'; proc gplot; bubble reschi * pred = dens; run;
Program 8.1
data; do row=1 to 2; do col=1 to 2; input count @@; output; end; end; datalines; 4 5 12 74 run; proc freq; tables row * col / all; weight count; exact or; run;
Program 8.2
title1 'Extended hypergeometric distribution in Genmod'; title2 'Oral contraceptive use and myocardial infarction, Shapiro 1979'; data mi; input age cwoc ocuse cases total; label cwoc = 'cases with oc use' ocuse = '# in age stratum using oc''s' cases = '# of cases in age stratum' total = 'sample size in this age stratum' age = 'age category' loga = 'log-age category'; loga=log(age); datalines; 1 4 66 6 292
2 9 42 21 444 3 4 30 37 393 4 6 15 71 442 5 6 11 99 405 run; title3 'Model for log-odds ratio is linear in age'; proc genmod data=mi;
%fithyp(cases,ocuse,total); /* Provide table margins */ model cwoc = age / obstats intercept=2 initial = -.30 to -.10 by .05 maxit=250000; run; title3 'Model for log-odds ratio is linear in log-age'; proc genmod data=mi; %fithyp(cases,ocuse,total); /* Provide table margins */ model cwoc = loga / obstats intercept=2 initial = -.80 to -.60 by .05 maxit=250000; ods output obstats=fitted; /* Create fitted value dataset */ ods exclude listing obstats; run; data both; merge mi fitted; /* Merge fitted & original values */
%hypmean(cases,ocuse,total,xbeta); mean=hypmean; sd=sqrt(hypvar); run; proc print data=both; var age loga mean sd total pred xbeta; run;
Program 8.3
title1 'Extended hypergeometric regression in Genmod'; title2 'Four fungicide 2x2 tables';
data avadex; input expt exposed tumors total strain $ sex $; label expt = 'exposed mice w/tumors'
exposed = 'mice exposed to fungicide' tumors = 'mice with tumors' total = 'sample size in this 2x2 table' strain = 'X or Y' sex = 'M or F' ;
proc genmod; %fithyp(exposed,tumors,total); /* Provide table margins */ class strain sex; model expt = strain sex / obstats maxiter=250 type1 type3;
run;
%macro hyp(m,n,nn,x); /* Log of 2 binomial coefficients in the numerator of the extended hypergeometric distribution: m=row sum; n=col sum; nn=sample size; x=count */
lgamma(&m+1)-lgamma(&x+1)-lgamma(&m-&x+1) /* m choose x */ /* nn-m choose n-x */ + lgamma(&nn-&m+1)-lgamma(&n-&x+1)-lgamma(&nn-&n-&m+&x+1) /* note omitted semicolon */ %mend hyp;
n=column sum; nn=sample size; lor=log-odds ratio */ den=0; hypmean=0; hypvar=0; do j=max(0,&m+&n-&nn) to min(&n,&m); /* loop over range */ dterm=exp(%hyp(&m,&n,&nn,j)+j*&lor); den=den+dterm; /* accumulate denominator */ /* accumulate mean */
hypmean=hypmean+j*dterm;
%macro hypinv(m,n,nn,expv); /* Find the hypergeometric parameter giving a distribution with expected value equal to expv. This value is called lamd upon completion. Interval bisection is used to find lamd. The initial interval for estimation of log-odds-ratio is +/- lorlimit. This log-odds parameter is needed for the deviance. lorlimit= 15; */
/* initial interval
*/
/* if expv is at extreme of its range then the model is degenerate */ if &expv LE max(0,&m+&n-&nn) then lamhi=lamlo; if &expv GE min(&n,&m) then lamlo=lamhi; /* expv low */
/* exvp at hi end */
/* mean at midpoint */
/* shrink interval: equate expected value with expv if hypmean GE &expv then lamhi=lamd; if hypmean LE &expv then lamlo=lamd; end; %mend hypinv;
%macro fithyp(n,m,nn); /* This macro provides GENMOD with the link, inverse link, variance, and deviance needed to perform regression on the log odds ratio parameter of the extended hypergeometric distribution. The parameters are: n=row sum; m=column sum; nn=2x2 table total. */
*/
/* The deviance requires the unconstrained maximum likelihood estimator. This estimate is the parameter value that equates the expected value with the observed value, _RESP_ */
*/
/*
*/
devi= 2*log(devnum/devden);
/*
*/
invlink ilink = mean0; fwdlink link = log(_MEAN_); deviance dev = devi; variance vari = var0;
/* Hypergeometric variance */
%mend fithyp;
/* Approximate power for chi-squared tests. INVOKE AS INPUT: df = degrees of freedom, taken to be 1 if missing ncp = non-centrality parameter for a sample of size n=1 alpha = significance level under null hypothesis, taken to be .05 if missing OUTPUT variables: power = values .25 to .9 by .05 and .9 to .99 by .01 n = approximate sample size at respective power */ %macro chipow(df,ncp,alpha); sig=α if sig=. then sig=.05; %chipow(df=.,ncp=.,alpha=.);
do j=1 to 23;
*/
if j>=15 then power=.9+.01*(j-14); /* ... and .9 by .01 to .99 */ /* approximate sample size */
n=cnonct(cinv(1-sig,&df),&df,1-power)/&ncp; output; end; drop j sig; %mend chipow; /* drop local variables */
Program 9.2
/* Power for the null situation where margins are know a priori in a 2x2 table. INVOKE this macro as %nullpow(n=100, p1=.5, p2=.5); INPUT: n= known sample size, assumed to be 100 if missing p1,p2 = marginal probabilities, set to 1/2 if missing OUTPUT variables: power = values .25 to .9 by .05 and .9 to .99 by .01 psi1-psi3 = odds ratios that can be tested with specified power and alpha=.01, .05, and .10, respectively. */ %macro nullpow(n=100,p1=.5,p2=.5); %let nsig=3; /* number of significance levels */
array theta(&nsig) theta1-theta&nsig; array psi(&nsig) psi1-psi&nsig; alpha(1)=.01; alpha(2)=.05; alpha(3)=.10; m= 1 / (&p1*&p2) + 1 / (&p1*(1-&p2)) + 1 / ((1-&p1)*&p2) + 1 / ((1-&p1)*(1-&p2)); do j=1 to 23; /* multiplier in (9.10) */ /* range for power */ /* specify significance levels */
if j<15 then power=.2 + j/20; /* power is .25 by .05 to .9 */ if j>=15 then power=.9+.01*(j-14); /* and .9 by .01 to .99 */ do k=1 to &nsig; /* for every different significance level */ t=cnonct(cinv(1-alpha(k),1),1,1-power); /* 1 df crit val */ theta(k)=sqrt(t/(m*&n)); /* theta in Table 9.9, 10 */
psi(k)=((1+4*theta(k))/(1-4*theta(k)))**2; /* odds ratio */ end; output; end; %mend nullpow; title1 'Approximate odds ratios for chi-squared tests'; title2 'Significance levels are .01, .05, and .10'; data initial; %nullpow(n=150,p1=.5,p2=.5); /* specify N and marginal prob's */ run; proc print noobs; var power psi1 psi2 psi3; run; /* every level of power on a different line */
*/
*/ */
dev= -lam + y*log(lam) - log(1-exp(-lam)); /* deviance end; dev = dev + lambda - y*log(lambda) + log(1-eml);
*/
invlink linv = lambda / (1-eml); /* Truncated Poisson mean /* Truncated Poisson variance */
*/
%macro inv(ev); /* Interval bisection to find lambda from the expected value (ev) */ if &ev LE 1 then lam= . ; else do; lamlo=&ev-1; lamhi=&ev; /* expected value must be >1 */ /* lambda is not defined for ev LE 1 */ /* otherwise iterate to find lambda */ /* lambda is between exp value less one */ /* . . . and the expected value */
do until (abs(lamhi-lamlo)<1e-7); /* convergence criteria */ lam=(lamhi+lamlo)/2; mal= lam/(1-exp(-lam)); /* examine midpoint /* mean at midpoint */ */
if mal GE &ev then lamhi=lam; /* lower upper endpoint */ if mal LE &ev then lamlo=lam; /* raise lower endpoint */ end; end; %mend inv;