題目:以2000年人口資料為母體,依縣市比例系統抽樣家戶再從家戶中隨機抽樣一人
/*輸入資料*/
data raw;
infile "c:/population.txt";
input county$ 11-12 house$ 13-26 unit 30-33 sex 37 age 45-47 edu 61-62 martial 5
9 ;
run;
/*刪除外島縣市*/
data raw;
set raw;
if county=16 then delete;
if county=71 then delete;
if county=72 then delete;
run;
/*刪除年齡小於15歲的資料*/
data raw;
set raw;
if age<15 then delete;
run;
/*計算各縣市比例*/
proc freq data=raw;
table county / out=cntyP;
run;
/*依各縣市比例計算應抽樣本數,總樣本數200
_NSIZE_為SAS保留字不可隨意更改
這段code有點瑕疵,因為四捨五入,最後抽樣數量可能比200多或少*/
data cntyP;
set cntyP;
_NSIZE_ = floor(200*PERCENT/100+0.5);
run;
/*surveyselect前置作業:以家戶排序
分層抽樣皆須先以[strata]排序*/
proc sort data=raw out=raw;
by house;
run;
/*各家戶隨機抽一人*/
proc surveyselect data=raw method=srs out=house sampsize=1;
strata house;
run;
/*surveyselect前置作業:以縣市排序*/
proc sort data=house out=house;
by county;
run;
/*各縣市分別系統抽樣
以n=[data set]指定抽樣數量,該data set中必須包含[strata](即county)和_NSIZE_*/
proc surveyselect data=house method=sys out=sample n=cntyP;
strata county;
run;
--
※ 發信站: 批踢踢實業坊(ptt.cc)
◆ From: 122.126.32.152