/*This set of macros is designed to calculate the false discovery */ /*rates (FDR) of the threshold levels 0.05, 0.1, and 0.2 on */ /*specified p-values. Threshold levels can be altered in the */ /*designated lines. Probes that meet the strictest threshold are */ /*designated as red. Probes that meet mid level threshold are */ /*designated as orange. Probes that meet the laxest threshold are */ /*designated as yellow. Probes that fail to meet any of the */ /*threshold values are designated as tan. These colors can be */ /*found in a column that is named the same as the p-value */ /*preceeded by FDR. */ /*The comment in the last data set can be removed if just the */ /*individual color flags desired. */ %macro fdr(probe_id, pvalue, color, level); /*calculate threshold based on fdr formula;*/ data threshold_calc_&color; set &pvalue._max; threshold=&level*&pvalue._rank/max_rank; run; /*sort rank in descending order;*/ proc sort data=threshold_calc_&color; by descending &pvalue._rank; run; /*find the first time that the p-value is less than the threshold from righthand side and create blank dummy var x;*/ data crossover_&color; set threshold_calc_&color; if &pvalue > threshold then fdr_&pvalue = "0"; else fdr_&pvalue="1"; run; /*keep the first (highest) p-value which is less than or equal to threshold;*/ data crossover_first_&color; set crossover_&color; where fdr_&pvalue="1"; if first.fdr_&pvalue; by fdr_&pvalue; &pvalue._rank_cross=&pvalue._rank; do i = 1 to num_probes; output; end; run; /*merge first crossover p-value with threshold calculations; *assign all p-values less than the crossover p-value as signficant; *assign significant p-values the appropriate color;*/ data &color._&pvalue; merge crossover_first_&color crossover_&color; if &pvalue._rank le &pvalue._rank_cross then fdr_sig="yes"; else fdr_sig="no"; if fdr_sig = "yes" then &color._&pvalue = "&color"; else &color._&pvalue="n"; drop threshold fdr_&pvalue &pvalue._rank_cross fdr_sig i &pvalue._rank; run; /*sort on &probe_id;*/ proc sort data=&color._&pvalue; by &probe_id; run; %mend fdr; %macro fdr_col(infile, probe_id, pvalue, alpha1, alpha2, alpha3); /*calculate the rank of the p-values and adding the ranks to the original data set;*/ proc rank data=&infile out=&pvalue._rank ties=low; var &pvalue; ranks &pvalue._rank; run; /*calculate the total number of probes in data set;*/ proc means data=&pvalue._rank noprint; var &pvalue._rank &probe_id; output out=num_probes max(&pvalue._rank)=max_rank n(&probe_id)=num_probes; run; /*adding the number of probes to the original data set;*/ data &pvalue._max; if _n_=1 then set num_probes; set &pvalue._rank; label num_probes=num_probes; drop _freq_ _type_; run; /*calculate fdr using specific thresholds; *thresholds can easily be changed by altering the numbers below*/ %fdr(&probe_id, &pvalue, y, &alpha1); %fdr(&probe_id, &pvalue, o, &alpha2); %fdr(&probe_id, &pvalue, r, &alpha3); /*merge the results of yellow, orange, and red fdr levels;*/ data colors_fdr; merge y_&pvalue o_&pvalue r_&pvalue; by &probe_id; run; /*combine the 3 color columns into 1, add tan for insignificant p-values, and make permenant; Uncomment the keep statement for color flags in separate columns*/ data fdr_&pvalue; length fdr_&pvalue $ 8; set colors_fdr; fdr_&pvalue="0_tan"; if y_&pvalue = "y" then fdr_&pvalue="1_yellow"; *else fdr_&pvalue="0_tan"; if o_&pvalue = "o" then fdr_&pvalue="2_orange"; if r_&pvalue = "r" then fdr_&pvalue="3_red"; if &pvalue=. then fdr_&pvalue=""; drop y_&pvalue o_&pvalue r_&pvalue max_rank num_probes; /*keep &probe_id fdr_&pvalue r_&pvalue y_&pvalue o_&pvalue;*/ run; %mend fdr_col;