########################## ##### R Tutorial ##### ##### 2010-01-28 ##### ##### PubH 7440 ##### ##### Laura Hatfield ##### ########################## ##################### ##### Running R ##### ##################### ## Comments begin with a hash/pound sign getwd() # Shows the current working directory dir() # Displays its contents ls() # Lists the R objects in the current workspace source("test.R") # Runs a code file ls() library(MASS) # Loads a previously installed package install.packages("BRugs") # Installs a new package #### Basic Data Structures #### ## Basic data structures family <- list() family\$members <- data.frame( first=c('Jeff','Martha','Laura','Lydia','Rachel'), year=c(1958,1959,1983,1985,1987)) # Creates a two-column data frame # Each column is named and stores a different type of data # c() is called the collection function family[['members''] family\$awesomeness <- rep(1e20,dim(family\$members)[1]) # Creates a vector list item # by repeating the same value # 1e20 is R's scientific notation # the number of columns of the family data frame is # how many times 1e20 will be repeated family\$awesomeness family\$correl <- matrix(c( 1,.2,.8,.3,.7, .2,1,.4,.7,.3, .8,.4,1,.1,.6, .3,.7,.1,1,.3, .7,.3,.6,.3,1),5,5) # This list item is a matrix # R fills a matrix with leftmost index moving fastest # The first argument is a vector of values, # second is number of rows, # third is number of columns isSymmetric(family\$correl) # A matrix is a true mathematical object solve(family\$correl) # Inverts the matrix rownames(family\$correl) <- colnames(family\$correl) <- family\$members\$first family\$correl['Jeff','Martha'] # Matrices may have row and column names # for convenient extraction aspirin <- read.table("http://www.biostat.umn.edu/~brad/data/aspirin_data.txt", sep="\t",header=F) # Load a data table from the class website # Various options allow one to read in a variety of plain text # data sets aspirin class(aspirin) attributes(aspirin) # Displays the contents, class, and attributes of the object names(aspirin) <- c('patient','before','after') # Assigns strings to the 'names' attribute ################################# ##### Statistical Functions ##### ################################# Z <- rnorm(1000,mean=5,sd=10) # Draws a random sample of normals hist(Z,freq=F) # Plots them in a histogram dens <- dnorm(0,mean=5,sd=10) abline(h=dens,col='red') # Computes and adds lines to denote the value # of the normal density at a given point abline(v=0,col='red') text(-20,dens,paste("f(0)=",round(dens,4)),pos=1,col='red') # Prints the rouded value of the density and labels it on the plot lines(seq(-30,40,l=100),dnorm(seq(-30,40,l=100),5,10),col='blue') # Adds the pdf curve to an existing plot # Note: seq() creates a list of numbers # here, from -30 to 40 with length 100 text(20,.03,"f(Z)",pos=1,col='blue') polygon(x=c(seq(-30,0,l=100),seq(0,-30,l=100)), y=c(dnorm(seq(-30,0,l=100),5,10),rep(0,100)),density=10,col='green') # Draws a shaded area text(-20,.01,paste("F(0)=",round(pnorm(0,5,10),4)),col='green') # Prints the (labeled) value of the CDF that was shaded arrows(qnorm(.975,5,10),.015,qnorm(.975,5,10),0,code=2,col='orange') text(qnorm(.975,5,10),.015,paste("q(.975)=", round(qnorm(.975,5,10),2)),col='orange',pos=4) # Plots the location of a the 97.5th normal quantile ############################## ##### Simple Programming ##### ############################## myfunction <- function(x){ for (i in 1:length(x)){ if (x[i] > 100) { print(paste("Subject ",i,", you have a fever of ",x[i],".",sep="")) } else { print(paste("Go to class, subject ",i,"!",sep="")) } } } # Uses logic and looping to print out information # based on an input vector of values myfunction(aspirin[,2]) myfunction(aspirin[,3]) # Applies the function to the aspirin data before and after ####################################### ##### Simple Statistical Analysis ##### ####################################### reg.results <- lm(aspirin[,2]~1) reg.results attributes(reg.results) summary(reg.results)