#bring in GRC data grcdata <- read.delim("C:/Users/jbeech/Desktop/crabs&snails.txt")
#subset to consider only snails attacked attacked <- subset(grcdata, Killed.or.Injured>0) #subset by prey species cerion <- subset(attacked, Species=='Cerion spp.') h.varians <- subset(attacked, Species=='H. varians')
#PCA Analysis (both species) #setup pca1 = prcomp(attacked[,c(4,5,6,11,15)], scale. = TRUE) scores = as.data.frame(pca1$x) #biplot biplot(pca1,scale=0, cex=.7, main = "Predator-Prey Body Size Relationships") #flip that biplot around to make it comparable to the others pca1$rotation = -pca1$rotation pca1$x <- -pca1$x biplot(pca1,scale=0, cex=.7, main = "Predator-Prey Body Size Relationships in Terrestrial Snails")
#PCA Analysis (Cerion spp.) #setup pca2 = prcomp(cerion[,c(4,5,6,11,15)], scale. = TRUE) scores2 = as.data.frame(pca2$x) #biplot biplot(pca2,scale=0, cex=.7, main = "Predator-Prey Body Size Relationships in Cerion spp.")
#PCA Analysis (H. varians) #setup pca3 = prcomp(h.varians[,c(4,5,11,15)], scale. = TRUE) scores3 = as.data.frame(pca3$x) #biplot biplot(pca3,scale=0, cex=.7, main = "Predator-Prey Body Size Relationships in H. varians")
#correlagrams #developing correlation matrices using pairs() #set up lattices and panels require(lattice) panel.hist <- function(x, ...) { usr <- par("usr"); on.exit(par(usr)) par(usr = c(usr[1:2], 0, 1.5) ) h <- hist(x, plot = FALSE) breaks <- h$breaks; nB <- length(breaks) y <- h$counts; y <- y/max(y) rect(breaks[-nB], 0, breaks[-1], y, col = "blue", ...) } panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) { usr <- par("usr"); on.exit(par(usr)) par(usr = c(0, 1, 0, 1)) r <- abs(cor(x, y)) txt <- format(c(r, 0.123456789), digits = digits)[1] txt <- paste0(prefix, txt) if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt) text(0.5, 0.5, txt, cex = cex.cor * r) }
#correlagram for all snails pairs(attacked[,c(4,5,6,11,15)], diag.panel=panel.hist,upper.panel=panel.cor, main="Land Crab Predation of Land Snails") #correlagram for Cerion spp. pairs(cerion[,c(4,5,6,11,15)], diag.panel=panel.hist,upper.panel=panel.cor, main="Land Crab Predation of Cerion Snails") #correlegram for H. varians pairs(h.varians[,c(4,5,6,11,15)], diag.panel=panel.hist,upper.panel=panel.cor, main="Land Crab Predation of H. varians")
#further correlagrams #SQRT of largest fragment ratio (attempting to create normal distribuion) for H. varians pairs(h.varians[,c(4,5,6,11,16)], diag.panel=panel.hist,upper.panel=panel.cor, main="Land Crab Predation of H. varians") #inverse log of largest fragment ratio (attempting to create normal distribuion) for H. varians pairs(h.varians[,c(4,5,6,11,17)], diag.panel=panel.hist,upper.panel=panel.cor, main="Land Crab Predation of H. varians") #log of largest fragment ratio pairs(h.varians[,c(4,5,6,11,18)], diag.panel=panel.hist,upper.panel=panel.cor, main="Land Crab Predation of H. varians") #natural log pairs(h.varians[,c(4,5,6,11,19)], diag.panel=panel.hist,upper.panel=panel.cor, main="Land Crab Predation of H. varians") #inverse natural log pairs(h.varians[,c(4,5,6,11,20)], diag.panel=panel.hist,upper.panel=panel.cor, main="Land Crab Predation of H. varians") #power(3) pairs(h.varians[,c(4,5,6,11,21)], diag.panel=panel.hist,upper.panel=panel.cor, main="Land Crab Predation of H. varians")
#All snails, all variables (gibberish) #basic d <- vegdist (attacked[,c(4,5,6,11,15)]) par(mfrow=c(3,1)) par(mar=c(3,4,1,1)+.1) csin <- hclust(d, method="single") csin plot(csin) plot(csin, hang=-1) ccom <- hclust(d, method="complete") plot(ccom, hang=-1) caver <- hclust(d, method="aver") plot(caver, hang=-1, main="Cluster Dendrogram of Variation in Bahamian Land Snails Attacked") rect.hclust(caver, 3) # boxes the groups at level 3
#hierarchical (less gibberish?) d1 <- dist(attacked[,c(4,5,6,11,15)], method = "euclidean") # distance matrix fit <- hclust(d1, method="ward.D") plot(fit,cex=0.6, main="Hierarchical Cluster Dendrogram of Variation in Bahamian Land Snails Attacked") ) # display dendogram groups <- cutree(fit, k=3) # cut tree into 3 clusters # draw dendogram with red borders around the 3 clusters rect.hclust(fit, k=3, border="red")
#K means # K-Means Clustering with 4 clusters fit <- kmeans(attacked[,c(4,5,6,11,15)], 4) fit # check out centroid means, membership vector clusplot(attacked[,c(4,5,6,11,15)], fit$cluster, color=TRUE, shade=TRUE, labels=2, lines=0, cex=0.4, main="Cluster Analysis of Variation in Bahamian Land Snails Attacked")
#height and length only (cluster into species?) #basic e <- vegdist (attacked[,c(4,5)]) par(mfrow=c(3,1)) par(mar=c(3,4,1,1)+.1) dsin <- hclust(e, method="single") dsin plot(dsin) plot(dsin, hang=-1) dcom <- hclust(e, method="complete") plot(dcom, hang=-1)
#Cerion snails (all variables) #basic d2 <- vegdist (cerion[,c(4,5,6,11,15)]) par(mfrow=c(3,1)) par(mar=c(3,4,1,1)+.1) csin2 <- hclust(d2, method="single") csin2 plot(csin2) plot(csin2, hang=-1) ccom2 <- hclust(d2, method="complete") plot(ccom2, hang=-1) caver2 <- hclust(d2, method="aver") plot(caver2, hang=-1, main="Cluster Dendrogram of Variation in Cerion spp. Snails Attacked") rect.hclust(caver2, 3) # boxes the groups at level 3
#hierarchical d2.1 <- dist(cerion[,c(4,5,6,11,15)], method = "euclidean") # distance matrix fit <- hclust(d2.1, method="ward.D") plot(fit,cex=0.6, main="Hierarchical Cluster Dendrogram of Variation in Cerion spp. Snails Attacked") # display dendogram groups <- cutree(fit, k=3) # cut tree into 5 clusters # draw dendogram with red borders around the 5 clusters rect.hclust(fit, k=3, border="red")
#K means # K-Means Clustering with 3 clusters fit2 <- kmeans(cerion[,c(4,5,6,11,15)], 3) fit2 # check out centroid means, membership vector clusplot(cerion[,c(4,5,6,11,15)], fit2$cluster, color=TRUE, shade=TRUE, labels=2, lines=0, cex=0.4, main="Cluster Analysis of Variation in Cerion spp. Snails Attacked")
#H. varians snails (all variables) #basic d3 <- vegdist (h.varians[,c(4,5,11,15)]) par(mfrow=c(3,1)) par(mar=c(3,4,1,1)+.1) csin3 <- hclust(d3, method="single") csin3 plot(csin3) plot(csin3, hang=-1) ccom3 <- hclust(d3, method="complete") plot(ccom3, hang=-1) caver3 <- hclust(d3, method="aver") plot(caver3, hang=-1, main="Cluster Dendrogram of Variation in H. varians Snails Attacked") rect.hclust(caver3, 3) # boxes the groups at level 3
#hierarchical d3.1 <- dist(h.varians[,c(4,5,11,15)], method = "euclidean") # distance matrix fit <- hclust(d3.1, method="ward.D") plot(fit,cex=0.6, main="Hierarchical Cluster Dendrogram of Variation in H. varians Snails Attacked") # display dendogram groups <- cutree(fit, k=2) # cut tree into 5 clusters # draw dendogram with red borders around the 5 clusters rect.hclust(fit, k=2, border="red")
#K means # K-Means Clustering with 2 clusters fit3 <- kmeans(h.varians[,c(4,5,11,15)], 2) fit3 # check out centroid means, membership vector clusplot(h.varians[,c(4,5,11,15)], fit3$cluster, color=TRUE, shade=TRUE, labels=2, lines=0, cex=0.4, main="Cluster Analysis of Variation in H. varians Snails Attacked")
#DISCRIMINANT ANALYSIS #setup #NOTE: MASS and vegan will not run in the same session #Attribution: stolen from Little Book of R install.packages("MASS") require(MASS) #write the function calclda calclda <- function(variables,loadings) { # find the number of samples in the data set as.data.frame(grcdata[,c(4,5,6,11,15)]) numsamples <- nrow(variables) # make a vector to store the discriminant function ld <- numeric(numsamples) # find the number of variables numvariables <- length(variables) # calculate the value of the discriminant function for each sample for (i in 1:numsamples) { valuei <- 0 for (j in 1:numvariables) { valueij <- variables[i,j] loadingj <- loadings[j] valuei <- valuei + (valueij * loadingj) } ld[i] <- valuei } # standardise the discriminant function so that its mean value is 0: ld <- as.data.frame(scale(ld, center=TRUE, scale=FALSE)) ld <- ld[[1]] return(ld) }
#trying it again with attack type grc.lda3 <- lda(grcdata$Killed.or.Injured ~ grcdata$Shell.Length + grcdata$Shell.Width + grcdata$Shell.Thickness + grcdata$Predator.Carapace.Width + grcdata$Largest.Fragment.Ratio) head(grc.lda3) grc.lda3$scaling[,1] calclda(attacked[,c(4,5,11,15)], grc.lda3$scaling[,1]) grc.lda.values3 <- predict(grc.lda3, grcdata[,c(4,5,6,11,15)]) grc.lda.values3$x[,1] plot(grc.lda.values3$x[,1], grc.lda.values3$x[,2], main="Attack Type Discrimination") text(grc.lda.values3$x[,1],grc.lda.values3$x[,2],attacked$Killed.or.Injured,cex=0.7,pos=4,col="red")
#trying it again with species #NOT WORKING grc.lda4 <- lda(grcdata$Species ~ grcdata$Shell.Length + grcdata$Shell.Width + grcdata$Shell.Thickness + grcdata$Predator.Carapace.Width + grcdata$Largest.Fragment.Ratio) head(grc.lda4) grc.lda4$scaling[,1] calclda(attacked[,c(4,5,11,15)], grc.lda4$scaling[,1]) grc.lda.values4 <- predict(grc.lda4, grcdata[,c(4,5,6,11,15)]) grc.lda.values4$x[,1] plot(grc.lda.values4$x[,1], grc.lda.values4$x[,2], main="Prey Species Discrimination") text(grc.lda.values4$x[,1],grc.lda.values4$x[,2],attacked$Species,cex=0.7,pos=4,col="red") #its guesses about what species each data point should be grc.lda.values4$class
#wouldn't run (no second scaling factor), so then I tried using shell thickness as a species proxy (runs, but results unhelpful) grc.lda5 <- lda(grcdata$Shell.Thickness ~ grcdata$Shell.Length + grcdata$Shell.Width + grcdata$Shell.Thickness + grcdata$Predator.Carapace.Width + grcdata$Largest.Fragment.Ratio) head(grc.lda5) grc.lda5$scaling[,1] calclda(attacked[,c(4,5,11,15)], grc.lda5$scaling[,1]) grc.lda.values5 <- predict(grc.lda5, grcdata[,c(4,5,6,11,15)]) grc.lda.values5$x[,1] plot(grc.lda.values5$x[,1], grc.lda.values5$x[,2], main="Prey Species Discrimination by a Shell Thickness Proxy") text(grc.lda.values5$x[,1],grc.lda.values5$x[,2],attacked$Species,cex=0.7,pos=4,col="red")