SAS Code-Chapter 3

 

 

This will be ordered/alphabetized when I no longer need it this semester.

 

 

 

 

Frequency Tables

 

 

 

proc freq data=filename;

      tables cat_var;

run;

 

*prints frequency table by cat_var;

 

 

 

 

 

 

proc format;                             

      value length_fmt

            .='Missing'

            low - 3 = '(-inf,3]'

            3< - 5 = '(3,5]'

            5< - 7 = '(5,7]'

            7< - 9 = '(7,9]'

            9< - high = '(9,+inf)'

;

run;

*categorizes the quantitative values into intervals for frequency table by quantitative variable

 

##< is open interval at that ##

 

Do not have to use intervals, can use any name;

 

 

 

 

proc freq data=filename;

      format var_name length_fmt.; 

      tables var_name;

run;

 

*. Following format name indicates numerical format;

 

 

 

 

Sample output

 

 

 

 

 

 

 

 

 

 

 

Printing Data

 

 

 

proc print data=filename;

run;

*prints all observations;

 

 

 

 

 

 

proc print data=filename (obs=##); 

run;

*prints observations first - ##;

 

 

 

 

 

 

proc print data=filename (firstobs=##);   

run;

*prints observations ## - last;

 

 

 

 

 

 

proc print data=filename (firstobs=##1 obs=##2);

run;

*prints observations ##1 - ##2;

 

 

 

 

 

 

 

 

 

 

Percentiles

 

 

 

proc means data=filename p1 p5 p10 p25 p75 p90 p99;

      var var_1 var_2 var_n;

run;

*will only generate commonly used percentiles (not 23%-tile) if %-tile is not blue then SAS will not show it;

 

 

 

 

 

 

proc univariate data=filename; 

      var var_1 var_2 var_n;

      output out=new_filename

            pctlpts=## ## ##       

            pctlpre=var_1 var_2 var_n

            pctlname=pct## pct## pct##;

run;

 

proc print data=new_filename;

run;

 

 

 

*can be any percentiles;

*gives the first part of title;

*gives the second part of title;

 

 

 

 

 

 

 

 

 

 

Summary Statistics

 

 

 

proc univariate data=filename all;         

run;

*'all' gives all the possible summary statistics, stemplot, boxplot, and normal prob plot for each variable;

 

 

 

 

 

proc univariate data=filename trim=0.#;  

run;

*this removes that proportion of data, half from top, half from bottom;

 

 

 

 

 

 

proc means data=filename range qrange;     

      var var_1 var_2 var_n;

run;

*qrange = IQR… you can put additional options here;

 

 

 

 

 

 

proc corr data=filename cov;

run;

*cov = includes covariance matrix without this option, you will just get the correlation matrix;

 

 

 

 

 

 

 

 

 

 

PLOTS

 

 

Stemplots

 

 

 

proc univariate data=filename plot plotsize=#;       

      var var_1;

run;

*# changes the number of stems;

 

 

 

 

 

 

 

Histograms

 

 

 

proc univariate data=filename noprint;

      histogram p_width / cfill=blue endpoints=0 to 2.5 by 0.5;

run;

 

*uses endpoints for histogram

 

 

 

 

 

 

proc univariate data=filename noprint;

      histogram var_1 / cfill=red vscale=count

      midpoints = 0.0625 to 2.4375 by 0.25;

run;

 

*’count’ uses counts rather than pcts;

*uses midpoints rather than endpoints

 

 

 

 

 

 

Boxplots

 

 

 

proc univariate data=filename plot;

      var var_1;

run;

*text boxplot;

 

 

 

 

 

 

proc boxplot data=filename;

      plot (var_1 var_2 var_3 var_n)* cat_var;

run;

*nicer boxplots separated by cat_var;

 

 

 

 

 

 

PieCharts

 

 

 

proc chart data=filename;

      pie cat_var;

run;

 

 

 

 

 

 

 

proc gchart data=filename;

      pie cat_var;

run;

 

 

 

 

 

 

 

C.D.F Charts

 

 

 

proc capability data=filename noprint;

      cdf var_1;

run;

*cdf is SUPPOSED to be red!!!!!!;

 

 

 

 

 

 

proc probit data=filename;

      cdfplot var=var_1;      

run;

*works in other SAS packages, not at KSU;

 

 

 

 

 

 

Matrix Plot

 

 

 

ods html;

      ods graphics on;

      proc corr data=filename plots=matrix;

            var var_1 var_2 var_3 var_n;

run;

 

      ods graphics off;

ods html close;

*this will show up in ‘Results Viewer’ along with simple statistics and correlation matrix;

 

 

 

 

 

 

proc insight data=filename;       

      scatter var_1 var_2 var_3 var_n * var_1 var_2 var_3 var_n    
      / label = class;

run;

*interactive graph - similiar to 'solution analysis';

 

 

 

 

 

 

Scatterplots

 

 

 

proc gplot data=filename;

      plot var_1*var_2;

run;

 

 

 

 

 

 

 

proc gplot data=filename;

      symbol1 value=dot color=black;

      symbol2 value=circle color=green;

      symbol3 value=plus color=red;

      plot var_1*var_2=class;

run;

*uses colored and/or symbol class labels;

 

 

 

 

 

 

3-D

 

 

data filename_symbol;

      set filename;

      length colorval $8 shapeval $8;

            if cat_var='name 1' then