Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
ISATEC-2017-R
Multivariate
Commits
78bb1d72
Commit
78bb1d72
authored
Dec 20, 2017
by
sonitaR
Browse files
added Wednesday files
parent
2c02295f
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
201 additions
and
0 deletions
+201
-0
3a. WednesdayClass-RDA.pdf
3a. WednesdayClass-RDA.pdf
+0
-0
3b. WednesdayCode-RDA.R
3b. WednesdayCode-RDA.R
+201
-0
3c. Chapter-10-InterpretationEcologicalStructures.pdf
3c. Chapter-10-InterpretationEcologicalStructures.pdf
+0
-0
3d. Chapter-11-CanonicalAnalysisIncludingRDA.pdf
3d. Chapter-11-CanonicalAnalysisIncludingRDA.pdf
+0
-0
No files found.
3a. WednesdayClass-RDA.pdf
0 → 100644
View file @
78bb1d72
File added
3b. WednesdayCode-RDA.R
0 → 100644
View file @
78bb1d72
#####################################################
# MULTIVARIATE STATISTICS #
#####################################################
#####################################################
# PART 3. INTERPRETATION OF ECOLOGICAL STRUCTURES #
#####################################################
#####################################################
# E3. REDUNDANCY ANALYSIS #
#####################################################
# preparation steps
rm
(
list
=
ls
())
require
(
reshape
)
require
(
ade4
)
require
(
vegan
)
require
(
packfor
)
require
(
MASS
)
require
(
ellipse
)
require
(
FactoMineR
)
# PREPARING THE BIOLOGICAL DATASET FOR MULTIVARIATE ANALYSIS ###################################
# A REPEAT OF YESTERDAY'S code
setwd
(
"C://multivar"
)
biomass
<-
read.table
(
"fish_biomass.txt"
,
h
=
T
,
sep
=
"\t"
)
biomass.wide
<-
cast
(
biomass
,
site
+
transect
~
species
,
sum
)
biomass.melted
<-
melt
(
biomass.wide
)
row.names
(
biomass.melted
)
<-
1
:
length
(
biomass.melted
[,
1
])
names
(
biomass.melted
)
<-
c
(
names
(
biomass.melted
)[
1
:
2
],
'biomass'
,
names
(
biomass.melted
)[
4
])
biomass.melted
<-
subset
(
biomass.melted
,
select
=
c
(
1
:
2
,
4
,
3
))
mean.biomass
<-
aggregate
(
biomass
~
site
+
species
,
FUN
=
mean
,
data
=
biomass.melted
)
meanbiomass.wide
<-
cast
(
biomass
,
site
~
species
,
sum
)
meanbiomass.wide.df
<-
data.frame
(
meanbiomass.wide
[,
-1
],
row.names
=
meanbiomass.wide
[,
1
])
# Load some graphic functions designed by Brocard et al (2011) we will need:
source
(
"evplot.R"
)
source
(
"cleanplot.pca.R"
)
# Hellinger-transform the species dataset
spe.hel
<-
decostand
(
meanbiomass.wide.df
,
"hellinger"
)
# Load the environmental dataset:
env.data
<-
read.table
(
"environmental_quant.txt"
,
h
=
T
,
sep
=
"\t"
)
# remove the label of sites to make it a numbers-only matrix to
# later be able to standardise
env.data
<-
data.frame
(
env.data
[,
-1
],
row.names
=
env.data
[,
1
])
# Check for collinearity of explanatory:
# We do this in non-standardised variables
# Write a function to compute correlation coefficients:
# source you.tube class: https://www.youtube.com/watch?v=wXFDIgaTdLw
panel.cor
<-
function
(
x
,
y
,
digits
=
2
,
prefix
=
""
,
cex.cor
)
{
usr
<-
par
(
"usr"
);
on.exit
(
par
(
usr
))
par
(
usr
=
c
(
0
,
1
,
0
,
1
))
r
<-
abs
(
cor
(
x
,
y
,
use
=
"complete.obs"
))
txt
<-
format
(
c
(
r
,
0.123456789
),
digits
=
digits
)[
1
]
txt
<-
paste
(
prefix
,
txt
,
sep
=
""
)
if
(
missing
(
cex.cor
))
cex
<-
0.8
/
strwidth
(
txt
)
test
<-
cor.test
(
x
,
y
)
Signif
<-
symnum
(
test
$
p.value
,
corr
=
FALSE
,
na
=
FALSE
,
cutpoints
=
c
(
0
,
0.05
,
0.1
,
1
),
symbols
=
c
(
"*"
,
"."
,
" "
))
text
(
0.5
,
0.5
,
txt
,
cex
=
cex
*
r
)
text
(
.8
,
.8
,
Signif
,
cex
=
cex
,
col
=
2
)
}
# plot every pair of variables, asking for the plot to include correlation coefficients
pairs
(
env.data
[
,
1
:
5
],
lower.panel
=
panel.smooth
,
upper.panel
=
panel.cor
)
# There are high levels of collinearity between pairs of variables:
# Between:
# Wave exposure and productivity
# Rugosity and algal cover
# Rugosity and coral cover
# Algal cover and coral cover
# We could therefore make some arbitrary decisions to move forward:
# Retain wave exposure exclude productivity (but consider that WE is informative of productivity)
# Retain rugosity, exclude coral cover (but consider that high rugosity = high coral.cover = low algal.cover)
# As we have retained rugosity, we must exclude algal.cover
# So our environmental matrix should contain:
# Wave exposure and rugosity
# NOTE: To make these decisions you need to make sure you
# base them in your ecological knowledge on the dataset
# A less arbitrary way to check for collinearity is to
# examine the variance inflation values (VIFS).
# These numbers indicate the proportion by which the variance of
# a regresion coefficient is inflated by the presence of another one
# to compute the VIFs:
require
(
faraway
)
sort
(
vif
(
env.data
))
# According to Borcard et al (2011) VIFs > 10 should be avoided,
# so based on this test we could opt for droping just
# algal cover and coral cover:
# checking out the names of the variables of our env.data
names
(
env.data
)
# Reduce this matrix to include just columns 1 and 3:
subset.env
<-
env.data
[
c
(
TRUE
,
TRUE
,
TRUE
,
FALSE
,
FALSE
)]
# and check the new VIFs:
sort
(
vif
(
subset.env
))
# all good, no value >10
# standardise the reduced environmental dataset:
stand.env
<-
scale
(
subset.env
)
stand.env
<-
as.data.frame
(
stand.env
)
# Run the simple RDA
formulaRDA
<-
rda
(
spe.hel
~
.
,
data
=
stand.env
)
summary
(
formulaRDA
)
# Some explanations of the output:
# (1) Partitioning of variance: the overall variance is partitioned
# into "constrained" and "unconstrained" fractions.
# The "constrained" fraction is the amount of variance of the Y matrix explained
# by the X explanatory matrix. Expressed as a proportion,it is equivalent to an R2
# in univariate multiple regression.
# For this analysis the amount of explained variance is (complete):
# R/ 33%
# This means that the remaining "unconstrained" variance (complete)
# R/ 67 %
# remains unexplained by the variables included in the model
# (2) Eigenvalues and their contribution to the variance:
# This analysis yielded (how many?) canonical axes labelled RDA1 to....?
# R/ 3 - to RDA3
# and an aditional (how many?) canonical axes labelled PC1 to ...?
# R/ 8 - to PC8
# Toghether the RDA1, RDA2, RDA3 added up = 33%
# Values for each RDA are the proportions contributed by each to the 33%,
# not to the 100%
# Toghether the PC1-PC8 added up = 68%
# Values of "proportion explained" under PC1-PC8 represent
# the amount of variance represented by residual PCs but NOT EXPLAINED
# by the RDA model
# (3) Note that the amount of variance explained by PC1 (which is residual
# i.e. not attributable to wave exposure or to rugosity) is larger, than
# the amount of variability explained by RDA3, in this case, the researcher
# should exploit this information and propose a hypothesis about the
# causes for this (outside the scope of this course but fyi).
# (4) Species scores, they are the coordinates of the tips of the
# species arrows
# (5) Site scores, they are the coordinates of the site dots in the
# reduced space generated with the response (site x species) matrix
# (6) Site constraints (linear combinations of constraining variables):
# coordinates of the sites in the reduced space generated with the
# matrix of explanatory (sites x environmental) variables
# (7) Scores for each of the explanatory (environmental) variables
# to be represented as lines in the RDA ordination plot
########################################################################
# NOW GET ALL THAT INFO OUT TO MAKE THE RDA PLOT
########################################################################
(
R2
<-
RsquareAdj
(
formulaRDA
)
$
r.squared
)
(
R2adj
<-
RsquareAdj
(
formulaRDA
)
$
adj.r.squared
)
# The second value is the unbiased amount of variation explained
# by the model
# As you can see this is very small
# only 8% of the variation is explained by these variables
# In this case plotting an RDA is not meaningful
# given that the species-environment relationship is so poor
# but we will do it anyway to learn how
#########################################################################
plot
(
formulaRDA
,
scaling
=
1
,
main
=
"RDA analysis of Palau fish biomass"
)
spe.sc
<-
scores
(
formulaRDA
,
choices
=
1
:
2
,
scaling
=
1
,
display
=
"sp"
)
arrows
(
0
,
0
,
spe.sc
[,
1
],
spe.sc
[,
2
],
length
=
0
,
lty
=
1
,
col
=
"red"
)
# Test of significance of the RDA results through permutation
anova.cca
(
formulaRDA
,
step
=
1000
)
anova.cca
(
formulaRDA
,
by
=
"axis"
,
step
=
1000
)
# Here we see that the RDA1 is significant
#########################################################################
# TRY NOW TO RUN THE MODEL EXCLUDING OTHER EXPLANATORY VARIABLES
#########################################################################
#########################################################################
# Tips for the report
# The questions you need to ask of this analysis:
# Do environmental characteristics play an important role in driving the
# spatial patters of fish community structure in Palau?
# Are there some environmental characteristics that play a more
# important role than others?
# Is there any remaining variation in community composition that appears
# unexplained by the variables the researcher quantified?
# Method:
# Say which data transformations you used (briefly justify why you need the
# transformation)
# Say which method you used (RDA)
# Results: Report the plot, use the output to interpret what it means
########################################################################
3c. Chapter-10-InterpretationEcologicalStructures.pdf
0 → 100644
View file @
78bb1d72
File added
3d. Chapter-11-CanonicalAnalysisIncludingRDA.pdf
0 → 100644
View file @
78bb1d72
File added
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment