Intro to Statistical methods using RStudio
Page 1: Data handling and descriptive statistics,
Page 2: Probability,
Page 3:
Intervals and sample size,
Page 4: Hypothesis Testing,
Page 5: Contingency tables,
Page 6: Linear Regression.
Page 1 | Page 2 | Page 3 | Page 5 | Page 6 |
Page 4: Hypothesis Testing,
1. Hypothesis Testing for Proportions:
# Z test proportions: creting a function,
z.test.prop.pvalue<-function(x, n, p, tails){
z=(x/n - p)/sqrt(p*(1-p)/n)
zc <- abs(z)
pv=pnorm(zc,lower.tail = F)
pv1<-round(pv,4)
pv2<-round(2*pv, 4)
options(scipen=999, digits=4)
cat("z test stat", z, fill=T)
if(tails==1) {cat("p-value", pv1, fill=T )}
if (tails==2) {cat("p-value", pv2, fill=T )}
}
#Z example with: x=130, n=200,p=0.60. one tail.
z.test.prop.pvalue(130,200,0.60,1)
#using R function prop.test prop.test(130,200,0.60, alternative ="greater", correct = F)
# Taking sqrt of X-squared value, retrieve z:
sqrt(2.1) # = 1.449
# for two-proportions:
prop.test(x=c(x1,x2),n=c(n1,n2), alternative =" ", correct = F)
#Example: compare tow proportions. Are they equal? 35/195 and 53/201
prop.test(x=c(35,53),n=c(195,201), alternative ="two.sided", correct = F)
2. Hypothesis Testing for means, asume sigma known:
# writing a function:
z.test.pvalue<-function(xbar, mu, sigma, n, tails) {
z=(xbar-mu)*sqrt(n)/sigma
zc <- abs(z)
pv=pnorm(zc,lower.tail = F)
pv1<-round(pv,4)
pv2<-round(2*pv, 4)
options(scipen=999, digits=4)
cat("z test stat", z, fill=T)
if(tails==1) {cat("p-value", pv1, fill=T )}
if (tails==2) {cat("p-value", pv2, fill=T )} }
# example: Ho mu =21.1 H1 !=21.1 sigma=1.8,
#
sample of n=30 yields mean=20.3
z.test.pvalue(xbar=20, mu=21.1, sigma=1.8, n=30, tails=2)
3. Hypothesis Testing for means, asume sigma unknown:
# writing a function:
t.test.pvalue<-function(xbar, mu, s, n, tails){
t=(xbar-mu)*sqrt(n)/s
tc <- abs(t)
pv=pt(tc, n-1, lower.tail = F)
pv1<-round(pv,4)
pv2<-round(2*pv, 4)
options(scipen=999, digits=4)
cat("t test stat", t, fill=T)
if(tails==1) {cat("p-value", pv1, fill=T )}
if (tails==2) {cat("p-value", pv2, fill=T )} }
# Example: with xbar=20, mu=21.1, s=1.8, n=20, two tailed test
t.test.pvalue(xbar=20,mu=21.1,s=1.8,n=20,tails=22) # t test with summary
4. Hypothesis Testing for means, raw data available:
t.test(x, y = NULL, alternative = c("two.sided", "less", "greater"), mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95)
# one sample:
x<-c(1, 1.1, 1.11, 1.21,1.3,1.05,1.3,1.31,1.27,1.33,1.29,1.16,0.99,2.3)
#testing that the pop mean = 1.1
t.test(x, mu=1.1, alternative="two.sided", conf.level = 0.99)
#t test for two independent samples in r
sample1 <- c(8, 8, 9, 9, 9, 11, 12, 13, 13, 14, 11, 15, 18) sample2 <- c(11, 12, 13, 13, 14, 14, 14, 15, 16, 18, 18, 17,14) #perform two sample t-test t.test(sample1, sample2, var.equal=TRUE, alternative="two.sided", conf.level = 0.95)
#paired samples
A paired sample t-test is used to compare the means of two related groups to determine whether there is a significant difference between them.
#paired t test before <- c(39,43,41,32,37,40,42,40,37,38) after <- c(42,45,42,43,40,44,40,43,41,40) # Perform the paired t-test: depending samples # say, after a given treatment the experimental units improve #mean_after > mean_before t.test(x=before,y=after,paired = TRUE,alternative = "greater")
#t test for two independent samples in r from dataframe, using formula: obs ~factor
# compare the mpg for cars in two groups: automatic transmissions vs manual (mtcars)
?mtcars # Transmission (0 = automatic, 1 = manual)
boxplot(mpg~am, data=mtcars ) # visualize
aggregate(mpg ~ am, data = mtcars, summary) #summarizing stats
t.test(mpg~am, var.equal=TRUE, alternative="two.sided", conf.level = 0.95, data=mtcars)