forked from XiaCatQ/Spatial_Project
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Workflow_Confirm.Rmd
277 lines (209 loc) · 6.73 KB
/
Workflow_Confirm.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
---
title: "Workflow_Confirm"
author: "ChenZiYing (Sophie)"
date: "2023-04-28"
output:
html_document:
css: tutorial.css
fig_caption: yes
highlight: textmate
theme: flatly
toc: yes
toc_float: yes
---
```{r setup, include=FALSE,warning=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
1. Load all packages we need. Please refer to **Statistical Packages**.
```{r}
# Load required packages
library(sp)
library(dplyr)
library(sf)
library(spatstat)
library(maptools)
library(rgdal)
```
2. Data processing:
```{r}
# Load Oreoscoptes data
df <- read.csv("BC_data.csv", stringsAsFactors = FALSE)
# Generate a dataframe with longitude and latitude in BC_data
coord <- df[,c('decimalLatitude','decimalLongitude')]
# Clean the data by removing the observations with NA
coord <- na.omit(coord)
# Visualize the data
plot(coord$decimalLatitude, coord$decimalLongitude)
# Convert to CRS
coordinates(coord)<- ~decimalLongitude+decimalLatitude
proj4string(coord) <- CRS("+proj=longlat +datum=WGS84")
coord_conv <- spTransform(coord,CRS("+proj=aea +lat_0=45 +lon_0=-126 +lat_1=50 +lat_2=58.5 +x_0=1000000 +y_0=0 +datum=NAD83 +units=m +no_defs"))
# Load the covariates data
load("BC_Covariates.Rda")
# Define the observation area, window
win = as.owin(DATA$Window)
# create ppp object with coordination and the window
ppp_birds <- ppp(coord_conv@coords[,1],
coord_conv@coords[,2],
window=win)
```
3. Inspecting and Exploring data
```{r, warning=FALSE}
# Check the spread of the bird `ppp_birds`
plot(ppp_birds)
# Confirm inhomogeneity
Q <- quadratcount(ppp_birds, nx = 10, ny = 10)
quadrat.test(Q)
# Hot spot analysis to identify areas of elevated intensity
## Estimate R
R <- bw.ppl(ppp_birds)
## Calculate test statistic
LR <- scanLRTS(ppp_birds, r = R)
## Plot the output
plot(LR)
# Identify the class of each potential covariate we have
sapply(DATA, class)
# Visualise each potential covariate using methods appropriate to each object class.
potential <- c("Window", "Elevation", "Forest", "HFI", "Dist_Water")
for (i in potential){
plot(DATA[[i]], main = i)
points(ppp_birds$x, ppp_birds$y, pch = 19, col = "white", cex = 0.6)
points(ppp_birds$x, ppp_birds$y, pch = 19, col = "black", cex = 0.4)
}
# Check relationships between potential covariates via kernel estimation
rho_elev <- rhohat(ppp_birds, DATA$Elevation)
rho_fore <- rhohat(ppp_birds, DATA$Forest)
rho_hfi <- rhohat(ppp_birds, DATA$HFI)
rho_dist_water <- rhohat(ppp_birds, DATA$Dist_Water)
plot(rho_elev, xlim = c(0, max(DATA$Elevation)))
plot(rho_fore, xlim = c(0, max(DATA$Forest)))
plot(rho_hfi, xlim = c(0, max(DATA$HFI)))
plot(rho_dist_water, xlim = c(0, max(DATA$Dist_Water)))
```
4. Model Building - Regression
```{r}
# Scale the covariates by their mean and variance
mu_elev <- mean(DATA$Elevation)
stdev_elev <- sd(DATA$Elevation)
DATA$Elevation_scaled <- eval.im((Elevation - mu_elev)/stdev_elev, DATA)
mu_water <- mean(DATA$Dist_Water)
stdev_water <- sd(DATA$Dist_Water)
DATA$Dist_Water_scaled <- eval.im((Dist_Water - mu_water)/stdev_water, DATA)
# Establish formula1:
formula1 <- ppp_birds ~ Elevation_scaled + I(Elevation_scaled^2) + Dist_Water_scaled + I(Dist_Water_scaled^2)
# Fit the model with formula1 by
fit1 <- ppm(formula1, data = DATA).
# Because the model did not converge, we simplified our formula by excluding *I(Elevation_scaled^2)*.
formula2 <- ppp_birds ~ Elevation_scaled + Dist_Water_scaled + I(Dist_Water_scaled^2)
# Fit the model with formula2 by
fit2 <- ppm(formula2, data = DATA).
# Test the statistical significance of coefficients by looking at the Ztest from`fit2` table.
fit2
# Identify the formula with coefficients
co <- coef(fit2)
```
5. Model Validation
```{r}
# Model visualisation
plot(fit2,
se = FALSE,
superimpose = FALSE)
#Overlay the B. pendula locations
plot(ppp_birds,
pch = 16,
cex = 0.7,
cols = "white",
add = TRUE)
plot(ppp_birds,
pch = 16,
cex = 0.5,
cols = "black",
add = TRUE)
#Mean disw
#E_disw <- mean(DATA$Dist_Water_scaled) # just 0
#Elevational effect on lambda at mean disw
elev_effect <- effectfun(fit2, "Elevation_scaled", Dist_Water_scaled = 0, se.fit = T)
disw_effect <- effectfun(fit2, "Dist_Water_scaled", Elevation_scaled = 0, se.fit = T)
#Side by side plotting
par(mfrow = c(1,2))
#Plot the elevation effect
plot(elev_effect,
legend = FALSE,
main = "Elevation effect at mean distance to water ")
plot(disw_effect,
legend = FALSE,
main = "Distance to Water effect at mean elevation ")
```
# Quardratic Counting by `quadrat.test(fit2, nx = 2, ny = 4)`
# Partial Residual Plots
```
## Model Validation
### Quadrat counting
```{r}
#Run the quadrat test
quadrat.test(fit2, nx = 2, ny = 4)
```
The small p value tells us that there’s a significant deviation from our model’s predictions. Let us dig out how to improve it.
### Partial residual plots
```{r}
#Calculate the partial residuals as a function of elevation
par_res_elev <- parres(fit2, "Elevation_scaled")
#Calculate the relative intensity as a function of gradient
par_res_disw <- parres(fit2, "Dist_Water_scaled")
#Side by side plotting
par(mfrow = c(1,2))
plot(par_res_elev,
legend = FALSE,
lwd = 2,
main = "",
xlab = "Elevation_scaled")
plot(par_res_disw,
legend = FALSE,
lwd = 2,
main = "",
xlab = "Dist_Water_scaled)")
```
From these figures we can see that the quadratic terms are not capturing the patterns in our data particularly well. As an improvement, we could try adding higher-order polynomials, but polynomials can be unstable. In this situation, it may be worth switching from a linear modelling framework, to an additive modelling framework (i.e., GAMs).
### GAM Not Valid,still 'df' was too small
```{r}
library(splines)
#Fit the PPP model
fit_smooth <- ppm(ppp_birds ~ bs(Elevation_scaled,2) + bs(Dist_Water_scaled, 3), data = DATA, use.gam = TRUE)
fit_smooth
```
```{r}
#Calculate the partial residuals as a function of elevation
par_res_elev <- parres(fit_smooth, "Elevation_scaled")
#Calculate the relative intensity as a function of gradient
par_res_disw <- parres(fit_smooth, "Dist_Water_scaled")
#Side by side plotting
par(mfrow = c(1,2))
plot(par_res_elev,
legend = FALSE,
lwd = 2,
main = "",
xlab = "Elevation_scaled")
plot(par_res_disw,
legend = FALSE,
lwd = 2,
main = "",
xlab = "Dist_Water_scaled)")
```
```{r}
#Plot the model predictions
plot(fit_smooth,
se = FALSE,
superimpose = FALSE)
#Overlay the B. pendula locations
plot(ppp_birds,
pch = 16,
cex = 0.7,
cols = "white",
add = TRUE)
plot(ppp_birds,
pch = 16,
cex = 0.5,
cols = "black",
add = TRUE)
```
- For a `GAM`, model, it looks like we do not have enough data to fit.