################ # Friday, October 16, 2009 # Stata comparison example # In R ################ library(foreign) nes <- read.dta("nes2000.dta") summary(nes[,c("hillary","partyid7","gender")]) attach(nes) table(partyid7) table(as.numeric(partyid7)) length(levels(partyid7)) partyid7 <- factor(as.numeric(partyid7),exclude=8:10,levels=1:7, labels=c("Strong Democrat","Weak Democrat", "Leaning Democrat","Independent", "Leaning Republican","Weak Republican", "Strong Republican")) levels(gender) <- c("Male","Female") plot(hillary ~ partyid7) plot(hillary ~ partyid7, xlab="Party Identification", ylab="Average Feelings Toward Hillary", axes=F) axis(2,las=1) partyid7.labels <- c("Strong\nDemocrat","Weak\nDemocrat", "Leaning\nDemocrat","Independent", "Leaning\nRepublican","Weak\nRepublican", "Strong\nRepublican") axis(1,at=1:7,labels=partyid7.labels,padj=0.3) box() hillary.pid7 <- split(hillary,partyid7) summary(hillary.pid7) boxplot(hillary.pid7) sapply(hillary.pid7,length) sapply(hillary.pid7,mean) help(mean) hillary.mean <- sapply(hillary.pid7,mean,na.rm=TRUE) plot(hillary.mean,type="l") plot(hillary.mean,type="l",axes=FALSE,lwd=1.5, ylim=c(0,100),ylab="Average Feelings Toward Hillary", xlab="Party Identification") axis(side=2,las=1) axis(side=1,at=1:7,labels=partyid7.labels,padj=0.2) # Harder way hillary.mean.hard <- rep(NA,7) for(i in 1:7){ hillary.mean.hard[i] <- mean(hillary[as.numeric(partyid7)==i],na.rm=TRUE) } plot(1:7,hillary.mean.hard,type="l",axes=FALSE,lwd=1.5, ylim=c(0,100),ylab="Average Feelings Toward Hillary", xlab="Party Identification") axis(side=2,las=1) axis(side=1,at=1:7,labels=partyid7.labels,padj=0.2) # Bar Plot barplot(hillary.mean) barplot(hillary.mean,names.arg=partyid7.labels, ylim=c(0,100),ylab="Average Feelings Toward Hillary", xlab="Party Identification",padj=0.2, las=1,cex.lab=1.2) box() # Harder way hillary.mean.hard.mf <- matrix(NA,7,2) for(i in 1:7){ for(j in 1:2){ hillary.mean.hard.mf[i,j] <- mean(hillary[as.numeric(partyid7)==i & as.numeric(gender)==j],na.rm=TRUE) } } rownames(hillary.mean.hard.mf) <- levels(partyid7) colnames(hillary.mean.hard.mf) <- levels(gender) plot(1:7,hillary.mean.hard.mf[,1],type="l",axes=FALSE,lwd=1.5, ylim=c(0,100),ylab="Average Feelings Toward Hillary", xlab="Party Identification") lines(1:7,hillary.mean.hard.mf[,2],lty=2,lwd=1.5) axis(side=2,las=1) axis(side=1,at=1:7,labels=partyid7.labels,padj=0.2) legend(6,95,legend=c("Male","Female"),lty=1:2) # I prefer segments(6,95,6.3,95) text(6.35,95,"Male",pos=4) segments(6,90,6.3,90,lty=2) text(6.35,90,"Female",pos=4)