In this section we will see how some of our problems could be done with base R.
x <- rnorm(1000, 10, 2)
hplot(x, n=50)
hist(x, 50)
bplot(x)
boxplot(x)
attach(mothers)
bplot(Length, Status)
boxplot(Length~Status)
attach(wine)
splot(Heart.Disease.Deaths, Wine.Consumption)
plot(Wine.Consumption, Heart.Disease.Deaths)
fivenumber(x, ndigit = 2)
## Minimum Q1 Median Q3 Maximum
## 4 8.76 9.89 11.16 16.77
## IQR = 2.4
round(c(min(x), quantile(x, 0.25), median(x), quantile(x, 0.75), max(x)), 2)
## 25% 75%
## 4.00 8.76 9.89 11.16 16.77
stat.table(x, ndigit = 2)
## Sample Size Mean Standard Deviation
## x 1000 9.89 1.94
round(c(length(x), mean(x), sd(x)), 2)
## [1] 1000.00 9.89 1.94
one.sample.t(x, conf.level = 90, ndigit = 3)
## A 90% confidence interval for the population mean is (9.794, 9.996)
t.test(x, conf.level = 0.9)
##
## One Sample t-test
##
## data: x
## t = 161.47, df = 999, p-value < 0.00000000000000022
## alternative hypothesis: true mean is not equal to 0
## 90 percent confidence interval:
## 9.794011 9.995797
## sample estimates:
## mean of x
## 9.894904
one.sample.t(x, mu.null = 10,
alternative = "greater", ndigit = 3)
## p value of test H0: mu=10 vs. Ha: mu > 10: 0.0433
t.test(x, mu=10, alternative = "greater")
##
## One Sample t-test
##
## data: x
## t = -1.715, df = 999, p-value = 0.9567
## alternative hypothesis: true mean is greater than 10
## 95 percent confidence interval:
## 9.794011 Inf
## sample estimates:
## mean of x
## 9.894904
the t.ps command does not exist in base R.
one.sample.prop(60, 100, conf.level = 90, ndigit = 3)
## A 90% confidence interval for the population proportion is (0.513, 0.682)
prop.test(60, 100, conf.level = 0.9)
##
## 1-sample proportions test with continuity correction
##
## data: 60 out of 100, null probability 0.5
## X-squared = 3.61, df = 1, p-value = 0.05743
## alternative hypothesis: true p is not equal to 0.5
## 90 percent confidence interval:
## 0.5127842 0.6816248
## sample estimates:
## p
## 0.6
one.sample.prop(60, 100, pi.null = 0.5,
alternative = "greater", ndigit = 3)
## p value of test H0: pi=0.5 vs. Ha: pi > 0.5: 0.0287
prop.test(60, 100, p=0.5, alternative = "greater")
##
## 1-sample proportions test with continuity correction
##
## data: 60 out of 100, null probability 0.5
## X-squared = 3.61, df = 1, p-value = 0.02872
## alternative hypothesis: true p is greater than 0.5
## 95 percent confidence interval:
## 0.5127842 1.0000000
## sample estimates:
## p
## 0.6
the prop.ps command does not exist in base R.
attach(draft)
pearson.cor(Draft.Number, Day.of.Year, conf.level = 90)
## A 90% confidence interval for the
## population correlation coefficient is ( -0.306, -0.143 )
cor.test(Draft.Number, Day.of.Year, conf.level = 0.9)
##
## Pearson's product-moment correlation
##
## data: Draft.Number and Day.of.Year
## t = -4.4272, df = 364, p-value = 0.00001264
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
## -0.3061994 -0.1427007
## sample estimates:
## cor
## -0.2260414
pearson.cor(Draft.Number, Day.of.Year, rho.null = 0)
## p value of test H0: rho=0 vs. Ha: rho <> 0: 0.000
slr(Draft.Number, Day.of.Year)
## The least squares regression equation is:
## Draft.Number = 225.009 - 0.226 Day.of.Year
## R^2 = 5.11%
summary(lm(Draft.Number~Day.of.Year))
##
## Call:
## lm(formula = Draft.Number ~ Day.of.Year)
##
## Residuals:
## Min 1Q Median 3Q Max
## -210.837 -85.629 -0.519 84.612 196.157
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 225.00922 10.81197 20.811 < 0.0000000000000002
## Day.of.Year -0.22606 0.05106 -4.427 0.0000126
##
## Residual standard error: 103.2 on 364 degrees of freedom
## Multiple R-squared: 0.05109, Adjusted R-squared: 0.04849
## F-statistic: 19.6 on 1 and 364 DF, p-value: 0.00001264
attach(houseprice)
mlr(Price, houseprice[, -1])
## The least squares regression equation is:
## Price = -67.62 + 0.086 Sqfeet - 26.493 Floors - 9.286 Bedrooms + 37.381 Baths
## R^2 = 88.6%
summary(lm(Price ~ ., data=houseprice))
##
## Call:
## lm(formula = Price ~ ., data = houseprice)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.018 -5.943 1.860 5.947 30.955
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -67.61984 17.70818 -3.819 0.000882
## Sqfeet 0.08571 0.01076 7.966 0.0000000462
## Floors -26.49306 9.48952 -2.792 0.010363
## Bedrooms -9.28622 6.82985 -1.360 0.187121
## Baths 37.38067 12.26436 3.048 0.005709
##
## Residual standard error: 13.71 on 23 degrees of freedom
## Multiple R-squared: 0.8862, Adjusted R-squared: 0.8665
## F-statistic: 44.8 on 4 and 23 DF, p-value: 0.0000000001558
library(leaps)
mallows(Price, houseprice[, -1])
## Number of Variables Cp Sqfeet Floors Bedrooms Baths
## 1 8.83 X
## 2 8.81 X X
## 3 4.85 X X X
## 4 5 X X X X
leaps(houseprice[, -1], Price)
## $which
## 1 2 3 4
## 1 TRUE FALSE FALSE FALSE
## 1 FALSE FALSE FALSE TRUE
## 1 FALSE FALSE TRUE FALSE
## 1 FALSE TRUE FALSE FALSE
## 2 TRUE FALSE FALSE TRUE
## 2 TRUE TRUE FALSE FALSE
## 2 TRUE FALSE TRUE FALSE
## 2 FALSE FALSE TRUE TRUE
## 2 FALSE TRUE FALSE TRUE
## 2 FALSE TRUE TRUE FALSE
## 3 TRUE TRUE FALSE TRUE
## 3 TRUE FALSE TRUE TRUE
## 3 TRUE TRUE TRUE FALSE
## 3 FALSE TRUE TRUE TRUE
## 4 TRUE TRUE TRUE TRUE
##
## $label
## [1] "(Intercept)" "1" "2" "3" "4"
##
## $size
## [1] 2 2 2 2 3 3 3 3 3 3 4 4 4 4 5
##
## $Cp
## [1] 8.834171 92.088525 104.303380 161.057329 8.812489 10.306028
## [7] 10.812154 66.886236 77.214388 87.881962 4.848657 10.794275
## [13] 12.289752 66.450032 5.000000
oneway(Length, Status)
## p value of test of equal means: p = 0.000
## Smallest sd: 2.5 Largest sd : 3.6
summary(aov(Length~Status))
## Df Sum Sq Mean Sq F value Pr(>F)
## Status 2 181.4 90.69 9.319 0.000208
## Residuals 91 885.6 9.73
attach(gasoline)
twoway(MPG, Gasoline, Automobile)
## Df Sum Sq Mean Sq F value Pr(>F)
## x 3 25.405 8.468 90.464 0.000000000000321
## z 2 0.527 0.263 2.813 0.0799
## x:z 6 0.909 0.151 1.618 0.1854
## Residuals 24 2.247 0.094
## [,1]
## Gasoline p = 0.0000
## Automobile p = 0.0799
## Interaction p = 0.1854
G <- as.factor(Gasoline)
A <- as.factor(Automobile)
summary(aov(MPG ~ G * A))
## Df Sum Sq Mean Sq F value Pr(>F)
## G 3 25.405 8.468 90.464 0.000000000000321
## A 2 0.527 0.263 2.813 0.0799
## G:A 6 0.909 0.151 1.618 0.1854
## Residuals 24 2.247 0.094
twoway(MPG, Gasoline, Automobile, with.interaction = FALSE)
## Df Sum Sq Mean Sq F value Pr(>F)
## x 3 25.405 8.468 80.510 0.0000000000000189
## z 2 0.527 0.263 2.504 0.0987
## Residuals 30 3.156 0.105
## [,1]
## Gasoline p = 0.0000
## Automobile p = 0.0987
summary(aov(MPG ~ G + A))
## Df Sum Sq Mean Sq F value Pr(>F)
## G 3 25.405 8.468 80.510 0.0000000000000189
## A 2 0.527 0.263 2.504 0.0987
## Residuals 30 3.156 0.105