-
Notifications
You must be signed in to change notification settings - Fork 0
/
Project-1.R
86 lines (57 loc) · 2.43 KB
/
Project-1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
library(dplyr)
library('rstatix')
library(ggplot2)
library(FSA)
library(gridExtra)
library(grid)
library(xtable)
setwd("E:/Subjects/ICS/")
data <- data.frame(read.csv(file = 'ImmoDataRuhr.csv'))
#Summary of data set
summary(data)
#Group by summary
group_by(data, regio2) %>%
summarise(count = n(),mean = mean(sqmPrice, na.rm = TRUE),median = median(sqmPrice,na.rm = TRUE),sd = sd(sqmPrice, na.rm = TRUE), var = var(sqmPrice, na.rm=TRUE))
##################ASSUMPTIONS#################
#Independent variables
#Homogeneity
plot(res.aov, 1)
#In the plot below, there is no evident relationships between residuals and fitted values (the mean of each groups),which is good. So, we can assume the homogeneity of variances.
#Normality
plot(res.aov, 2)
#As all the points fall approximately along this reference line, we can assume normality.
#Homogeneity
ggplot(data, aes(x= regio2,y = sqmPrice,fill = regio2))+
geom_boxplot()+
scale_x_discrete() + xlab("regio2") +
ylab("sqmPrice")
#Normality for individual groups
ggplot(data, aes(sample = sqmPrice)) +
stat_qq() +
stat_qq_line(col = "red") + facet_wrap(~ regio2)
#task1
#Comparing mean heights of players of 6 different sports.
#Null hypothesis: the means of the different sports are the same
#Alternative hypothesis: At least one sample mean is not equal to the others.
res.aov <- aov(sqmPrice~ regio2, data = data)
# Summary of the analysis
anova = summary(res.aov)
anova
#As the p-value (0.00351) is less than the significance level 0.05, we can conclude that
#there are significant differences between the regions and rejecting null hypothesis
#task2
#A t-test a statistic method used to determine if there is a significant difference
#between the means of two groups based on a sample of data.
#t test assumptions:
#continuous variable
#random sample
#normal distribution
#homogeneity
#H0: The difference between 2 sports mean is 0
#H1: The difference between 2 sports mean is NOT 0
two_sample_test_without_adj <- pairwise.t.test(data$sqmPrice, data$regio2,pool.sd = TRUE,
p.adjust.method = 'none',var.equal =TRUE)
two_sample_test_without_adj
two_sample_test_with_adj <- pairwise.t.test(data$sqmPrice, data$regio2,pool.sd = TRUE,
p.adjust.method = 'bon',var.equal =TRUE)
two_sample_test_with_adj