/*This macro is designed to remove the data for the probes */ /*that are effectively off. Probes that are effectively off */ /*are probes that have a signal that is less than the 95th */ /*percentile of the signal of the negative controls per slide */ /*per dye at least 50% of the time for all treatments. */ %macro find_p95(lib,infile,probe_id,factor1,factor2,signal, neg_control,neg_con_ind,trt); *reassigned infile to work folder; data &infile._1; set &infile; run; *took p90 for each slide/dye combo; proc means data=&infile._1 p95 noprint; /*remove comments if variable denoting negative controls*/ /*is a numeric*/ where &neg_control =&neg_con_ind; /*remove comments if variable denoting negative controls*/ /*is a character*/ /*where &neg_control ="&neg_con_ind";*/ class &factor1 &factor2; var &signal; output out=&factor1&factor2._p95 p95=p95; run; *subsetted _type_=1; data &factor1&factor2._p95_1; set &factor1&factor2._p95; where _type_=3; run; *sorted by slide/dye; proc sort data=&infile._1; by &factor1 &factor2; run; *merged p95 for each slide/dye combo to original file; data p95_merge; merge &factor1&factor2._p95_1 &infile._1; by &factor1 &factor2; run; *assigned spot on/off for each spot; data spot_off; set p95_merge; if &signal ge p95 then spot_off=0; if &signal.5 then off_&trtn=1; else off_&trtn=0; keep &probe_id percent_off_&trtn off_&trtn; run; %mend per_off; %macro find_off(lib,infile,probe_id,factor1,factor2,signal, neg_control,neg_con_ind,trt,trt1,trt2,trt3,trt4,trt5,trt6, trt7,trt8,trt9,trt10,suffix); *find p95; %find_p95(&lib,&infile,&probe_id,&factor1,&factor2, &signal,&neg_control,&neg_con_ind,&trt); *find the percent off for each trt; %per_off(&trt,&trt1); %per_off(&trt,&trt2); %per_off(&trt,&trt3); %per_off(&trt,&trt4); %per_off(&trt,&trt5); %per_off(&trt,&trt6); %per_off(&trt,&trt7); %per_off(&trt,&trt8); %per_off(&trt,&trt9); %per_off(&trt,&trt10); *merge all the percent off data sets together; data partial_off&suffix; merge po_&trt1 po_&trt2 po_&trt3 po_&trt4 po_&trt5 po_&trt6 po_&trt7 po_&trt8 po_&trt9 po_&trt10; by &probe_id; run; *gene is off if it is off at every trt; data gene_off; set partial_off&suffix; gene_off=0; if off_&trt1=1 and off_&trt2=1 and off_&trt3=1 and off_&trt4=1 and off_&trt5=1 and off_&trt6=1 and off_&trt7=1 and off_&trt8=1 and off_&trt9=1 and off_&trt10=1 then gene_off=1; run; proc sort data=gene_off; by &probe_id; run; *create data set with only off genes; data off_list_&lib._&suffix; set gene_off; where gene_off=1; if first.&probe_id; by &probe_id; run; *sort orig file by probeuid; proc sort data=&infile; by &probe_id; run; *merge orig file and gene off/on calls; data anova_all; merge &infile gene_off; by &probe_id; run; *subset all on genes into permenant data set; data anova_nooff_&lib._&suffix; set anova_all; where gene_off ne 1; run; %mend find_off;