/*This macro takes cleaned Imagene quantified image files */ /*(files with the rows above the header line removed) and */ /*outputs basic quality control information. Some basic */ /*statistics (mean, median, min, max, 1st quartile, 3rd */ /*quartile, and standard deviation) are calculated for the */ /*probes that represent genes and negative controls for both */ /*dyes. Also, it generates density plots of the genes and */ /*negative controls for both dyes. */ /* */ /*In order to run this macro, a few paths have to be adjusted */ /*within the code. First, the libraries have to be directed */ /*to the correct locations on the computer that the macro is */ /*being run on. Second, the path to the location of the */ /*quantified results and the location where you want to output*/ /*the results must be altered. Third, it assumed that the */ /*name of the quantified results file follows this structure: */ /*_.txt. The structure can be */ /*altered if necessary. There are comments above each of the */ /*places within the code where the paths need to be altered. */ /* */ /*Running a macro within SAS is simple. First, the libraries */ /*need to be assigned. This can be achieved by highlighting */ /*the three libname statements and either clicking on the run */ /*button on the toolbar or hitting F8 on the keyboard. */ /*Second, the macros need to be loaded. Again, just highlight*/ /*the code from the "%macro get_slide" line until the */ /*"%mend get_dyes" line and either click the run button or hit*/ /*F8. Third, to instantiate the macro, enter the slide number*/ /*and the slide name in the "%get_dyes" line. For example, */ /*if I want to run the red and green files named */ /*green_clean1.txt and red_clean1.txt, I would enter the line */ /*%get_dyes(1,clean); */ /*Then, just highlight that line and click run or hit F8. */ /*Several files can be run at once by entering several get_dye*/ /*statements: */ /*%get_dyes(1,clean); */ /*%get_dyes(2,clean); */ /*%get_dyes(3,clean); */ /*...and so on. */ /*A folder called plots must be inside of the output directory*/ /* * adjust paths of libraries below * * probev3 - This is where existing SAS formatted annotation * data exists. * * clean - This is the path to your "cleaned" files. "cleaned' * files are defined as having extra header / footer * information removed from them. (The one row column label * header should still be present). */ libname probev3 "z:\AAP Output\SAS anno"; libname clean "z:\mpopp\Glu-diallel\8-5-05\Results\qc_results"; *adjust paths of input, output, and annotation folders; %let inputPath=; *create a folder called plots in output folder; %let outputPath=; %let annotationPath=; *this macro imports the quantified results and renames columns as necessary; %macro get_slide(dye,slidenum,slidename); *adjust path of the quantified results; PROC IMPORT OUT= WORK.&dye._slide&slidenum DATAFILE= "&inputPath\&dye._&slidename&slidenum..txt" DBMS=TAB REPLACE; GETNAMES=YES; DATAROW=2; GUESSINGROWS=20; RUN; data re_&dye._slide&slidenum; set &dye._slide&slidenum; rename signal_mean=&dye._slide&slidenum._signal_mean background_mean=&dye._slide&slidenum._bkg_mean; keep row column signal_mean background_mean; run; %mend get_slide; *this macro finds the mean, min, max, q1, q3, median, and standard deviation of the negative controls and probes; %macro dye_means(dye); proc means data=slide&slidenum._anno; class our_neg_con_flag; var &dye._slide&slidenum._signal_mean; output out=means_&dye mean=mean min=min q1=quartile1 median=median q3=quartile3 max=max std=standard_deviation; run; data re_means_&dye; set means_&dye; dye="&dye"; drop _type_; run; %mend dye_means; *this macro creates separate columns based on the our_neg_con_flag and renames columns accordingly; %macro dens(neg_con,dye,slidenum); data &dye&neg_con._values; set value_count; where our_neg_con_flag=&neg_con and varname="&dye._slide&slidenum._signal_mean"; rename percent=&dye.percent&neg_con value=&dye.value&neg_con; keep percent value; run; %mend dens; *this is the master macro that calls on the above three macros. Then, the data is sorted by row and column and merged with probev3.map_anno by row and column.; %macro get_dyes(slidenum,slidename); *The get_slide macro is used to import the appripriate information from the quantified results, once for each dye.; %get_slide(green,&slidenum,&slidename); %get_slide(red,&slidenum,&slidename); *The data is sorted by row and column; proc sort data=re_green_slide&slidenum; by row column; proc sort data=re_red_slide&slidenum; by row column; *The data from probev3.map_anno is sorted by row and column; proc sort data=probev3.map_anno; by row column; run; *The quantified results are merged with probev3.map_anno by row and column; data slide&slidenum._anno; merge re_green_slide&slidenum re_red_slide&slidenum probev3.map_anno; by row column; run; *Basic statistics are calculated for each dye by the dye_means macro; %dye_means(green); %dye_means(red); *stacking the basic stats and creating a variable called subset to designate which stats were calculated on the data overall or the subsets of the genes (where our_neg_con_flag=0) and the negative controls (where our_neg_con_flag=1); data means_both_dyes; set re_means_green re_means_red; if our_neg_con_flag=. then subset="overall"; if our_neg_con_flag=0 then subset="genes"; if our_neg_con_flag=1 then subset="negative controls"; drop our_neg_con_flag; run; *adjust path to results folder; *The stacked results of the basic stats are exported into the folder set in the ouput path in a .csv file named according to the slidename variable followed by "_means"; PROC EXPORT DATA= WORK.means_both_dyes OUTFILE= "&outputPath\&slidename&slidenum._means.csv" DBMS=CSV REPLACE; RUN; *the quantified results merged with probev3.map_anno is sorted by our_neg_con_flag; proc sort data=slide&slidenum._anno; by our_neg_con_flag; run; *the frequencies of the signal intensities are calculated for each dye and our_neg_con_flag; proc univariate data=slide&slidenum._anno freq; by our_neg_con_flag; var green_slide&slidenum._signal_mean red_slide&slidenum._signal_mean; ods output frequencies=freq_slide&&slidenum; run; *the densities are calculated; proc freq data=freq_slide&slidenum; by our_neg_con_flag varname; tables value/out=value_count; run; *the dens macro is used for every dye and our_neg_con_flag; %dens(0,green,&slidenum); %dens(1,green,&slidenum); %dens(0,red,&slidenum); %dens(1,red,&slidenum); *the densities are merged and relabeled; data flags_sbys; merge green0_values green1_values red0_values red1_values; label greenpercent0= "Cy3 Genes" greenpercent1="Cy3 Negative Controls" redpercent0="Cy5 Genes" redpercent1="Cy5 Negative Controls"; run; *options for the plot are set; goptions device=GIF ftitle='Arial/bo' ftext='Arial' htitle=4 htext=2; symbol1 i=sm25 v=none l=2 c=green width=2; symbol2 i=sm25 v=none l=1 c=green width=2; symbol3 i=sm25 v=none l=2 c=red width=2; symbol4 i=sm25 v=none l=1 c=red width=2; axis1 order=(0 to 500 by 50) label=("signal intensity"); axis2 label=(angle=90 "density"); title "Density plots of &slidename &slidenum"; legend1 label=none shape=symbol(4,2) position=(top center inside) mode=share; *adjust path to results folder; filename ODSOUT "&outputPath\plots"; ODS Listing Close; ODS HTML path=ODSOUT body="&slidename&slidenum..html"; *adjust path to results folder; ODS HTML path="&outputPath\plots" body="&slidename&slidenum..html"; *the densities of each of the dyes for both the genes and the probes are plotted and overlaid; proc gplot data=flags_sbys uniform; plot greenpercent1*greenvalue1 greenpercent0*greenvalue0 redpercent1*redvalue1 redpercent0*redvalue0 /name="&slidenum" overlay haxis=axis1 vaxis=axis2 caxis=black ctext=black legend=legend1; run; quit; ods html close; ods listing; %mend get_dyes; *the macro get_dyes is invoked and the slide number followed by the slide name is added in; *enter the slide number and slide name, in that order, below. For example: %get_dyes(slidenumber,slidename);