Professional Documents
Culture Documents
Day 1
Day 1
*Data Step ;
*Data statement is used to create a new SAS data set;
Data customers; *Creating a new SAS data set and saving in the
temporary(WORK) library;
Set AIR.ORGANICS;
run;
*Getting your data in sas and converting data to SAS data sets;
* Read internal data into SAS data set uspresidents using DATA STEPS;
* DATA Statement creates a new data set;
*The INPUT statement describes the arrangement of values in the raw
data file and assigns input values
to the corresponding SAS variables - how to read the data;
*The DATALINES Statement to indicate the internal data and must be the
last statement in the DATA step;
*RUN statement is important - it depicts the end of the DATA step.;
DATA uspresidents;
INPUT President $ Party $ Number;
DATALINES;
Adams F 2
Lincoln R 16
Grant R 18
Kennedy D 35
;
RUN;
DATA uspresidents;
INFILE '/home/olabodeoa0/LSBDATA&CODE/President.dat' DLM=' ';
INPUT President $ Party $ Number;
RUN;
*Excercise;
* Create a SAS data set named toads;
* Read the data file ToadJump.dat using list input;
DATA ;
INFILE '';
INPUT ToadName $ Weight Jump1 Jump2 Jump3;
RUN;
* Using the DATA Step to create a new SAS data set from an existing
SAS data set,
you use a DATA step. You begin the DATA step with the DATA statement,
which provides the name of the SAS data set that you're creating.
The data set can be temporary or permanent.;
*The SET statement specifies the existing SAS data set that you want
to read in as input data. ;
DATA NIGERIA;
SET MAPSSAS.ALGERIA;
RUN;
data pairview.production_product;
set '/home/olabodeoa0/Data Class/production_product.sas7bdat';
run;
Data Selecting_Variables;
Set pairview.production_product ;
keep Name StandardCost;
run;
Data Selecting_Variables;
Set pairview.production_product;
Drop Name StandardCost;
run;
* Using the FORMAT Statement in a DATA Step- you can use the FORMAT
statement in a DATA step to
permanently associate formats with variables.
* FORMAT variable(s) format;;
*SAS Formats - used to enhance the appearance of variable values in
your reports;
*Formats - format only affects the displayed value. The stored value
is not affected by a format. ;
*The Sum function ignores missing values;
*SAS Functions ;
/*Functions performs calculation on, or a transformation of, the
arguments given in parentheses following the
function name*/
*Data to use ODBC - ADWDW - Dimcustomer;
*Input function - use input function to convert character variables to
numeric which includes dates and currency;
*Put Function - Use the Put function to convert numeric variables to
character variables;
Data SAS_functions (Keep= New_BirthDate New_DOF_purchase
Charac_Birthdate DOB_Month DOB_Day Concat_Names
Concat_Names2 Upcase_FirstName Age Tenuretodate Agedays);
FORMAT New_BirthDate YYMMDD10. New_DOF_purchase YYMMDD10.;
Set PAIRVIEW.dbo_dimcustomer;
New_BirthDate = INPUT(BirthDate, YYMMDD10.);
New_DOF_purchase = Input(DateFirstPurchase, YYMMDD10.);
Charac_Birthdate = Put(New_BirthDate, MONYY5.);
DOB_Month = Month(New_BirthDate);
DOB_Day = Day(New_BirthDate);
DOB_Year = Year(New_BirthDate);
Concat_Names = FirstName || MiddleName || LastName;
Concat_Names2 = Cat(FirstName, MiddleName, LastName);
Upcase_FirstName = Upcase(FirstName);
Age = Yrdif(New_BirthDate, today(),'Age');
Agedays = Yrdif(New_BirthDate, today(),'Age')*365.25;
Tenuretodate = Int(Yrdif(New_DOF_purchase, today(), 'Age'));
run;
day 2
*Proc print using a location based data sets in this case SAS servers;
Proc Print Data= '/home/olabodeoa0/dbo_dimemployee.sas7bdat' (obs=
10);
run;
*Proc without the data= options. It prints the most recently created
data sets;
Proc Print;
run;
*PROC SORT - Use this procedure to sort your data: to organise data
for a report, before merging or combining
data sets, or before using a BY statement in another PROC or DATA
step;
*BY statement specifies the variable to use to form BY groups. The
variables in the BY statement are called BY variables;
*Specify Ascending or Descending function in the BY statement before
adding the variables names (could be one or more)
If dont specify AScending or Descending order, SAS uses Ascending by
default;
*PROC SORT replaces the original data set unless you specify an output
data set in the OUT= option;
/* PROC SORT DATA= input-SAS-data-set
<OUT=output-SAS-data-set>;
BY<DESCENDING> by-variable(s);
RUN; */
Proc sort data= pairview.dbo_dimemployee;
by descending BaseRate;
run;
*Using OUT= Option - if you would like to create a new sas datasets
without altering the original one;
*NODUPKEY eliminates any duplicate observation in the BY variable;
Proc sort data= pairview.dbo_dimemployee out=work.dbo_dimemployee
nodupkey;
by descending Title;
run;
proc sql;
create table Stay as
select
Customerkey,
avg(SalesAmount) as AvgCust_Sales,
Sum(SalesAmount) as TotalCust_Sales
/* min(Orderdate) as Admit_Date,
max(Orderdate) as Discharge_Date,
max(Orderdate) - min(Orderdate) as Length_of_stay*/
from pairview.DBO_FACTINTERNETSALES
group by Customerkey,
;
quit;
proc sql;
create table pairview.Stay as
select
A.Customerkey,
MaritalStatus,
/*FORMAT YYMMDD10. input(Birthdate,YYMMDD10.) as BDate,*/
/*DATEDIFF (day, input(Birthdate,YYMMDD10.) , today() ),*/
INTNX('year', input(Birthdate,YYMMDD10.), 2, 'same') as
Nextbday,
INTCK('year', input(Birthdate,YYMMDD10.), today()) as Age,
avg(SalesAmount) as AvgCust_Sales,
Sum(SalesAmount) as TotalCust_Sales
from pairview.DBO_FACTINTERNETSALES as A
join pairview.DBO_DIMCUSTOMER B
on A.Customerkey = B.Customerkey
group by
A.Customerkey,
MaritalStatus,
Birthdate;
quit;