-
Notifications
You must be signed in to change notification settings - Fork 3
/
EPA_PhylogeneticSignal.R
85 lines (62 loc) · 3.07 KB
/
EPA_PhylogeneticSignal.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# This script generates performs the Mantel correlation to determine if there is a phylogenetic signal in our system ###
# RED 2017; [email protected]
setwd("/home/rdanczak/EPA_16S_Analysis/No_Pynast_bNTI")
# setwd("/Users/danczak.6/Documents/Wilkins Lab/Ohio EPA Project/Sequencing Data/16S Data/")
library(vegan)
library(picante)
is.wholenumber <- function(x, tol = .Machine$double.eps^0.5) abs(x - round(x)) < tol # Need this to fix the fact that you can't exactly have half of a read created from my averaging step
### Phylogenetic Signal ###
tree = read.tree("Raw_EPA_OTU_Tree.tre")
data = read.delim("Raw_EPA_no_pynast.txt", row.names = 1)
factors = read.csv("OTU_table_factors.csv", row.names = 1) # Importing factors to make the averaging of replicates possible
table = read.csv("ORP_Table.csv", row.names = 1)
# Doing some data cleaning
data = data[,-which(colnames(data) %in% c("YB04","BL01","BL02"))]
factors = factors[-which(row.names(factors) %in% c("YB04","BL01","BL02")),]
### Averaging my duplicate results
x = as.character(unique(factors[,1])) # Setting the unique factors to a variable
data.avg = matrix(nrow = length(data[,1]), ncol = length(x)) # Creating an empty matrix to spit the averaged data into
for(i in 1:length(x)){
w = which(factors[,1] %in% x[i])
if(length(w)==1){
data.avg[,i] = data[,w]
} else {
data.avg[,i] = apply(data[,w], 1, mean)
}
} # This loop goes through by column and averages the two columns with matching factors
colnames(data.avg) = x
row.names(data.avg) = row.names(data)
data = as.data.frame(t(data.avg))
rm('data.avg')
### Removing the 0.1um filter data from the dataset
w = grep(" 0.1", row.names(data))
data = data[-w,]
table = as.matrix(table)
table = as.data.frame(table[-w,])
### Need to rarefy or else cophenetic isn't happy
data = as.matrix(data)
data[which(is.wholenumber(data) == FALSE)] = data[which(is.wholenumber(data) == FALSE)]+0.5 # Rounding the 0.5s up as half a sequence is a sequence - it was present at least once
data = as.data.frame(rrarefy(data, 8000))
# Determining OTUs which have zero abundance after rarefaction
zero.members = apply(data, 2, max) # Finding the maximum number of sequence per OTU
data = data[,-which(zero.members==0)] # Removing those OTUs
# Matching the tree to the newly rarefied OTU dataset
phylo = match.phylo.data(tree,t(data))
data = t(phylo$data)
tree = phylo$phy
print("Finished rarefying...hopefully")
### Figuring out the abundance-weighted water table mean, this part was picked up from Stegen 2012
r = apply(data, 2, function(x) x*table) # Weighting ORP by abudances
x = matrix(unlist(r), ncol = length(r), byrow = F)
y = apply(x, 2, mean, na.rm = T) # Determing the mean of the column (i.e. the mean of abundance-weighted depths)
n = abs(outer(y, y, '-')) # Pairwise comparison of the abundance-weighted depths
### Reading the tree for phylogenetic distance information
print("Starting cophenetic")
x = cophenetic.phylo(tree)
### Trying to plot the Mantel Correlogram
print("We've gotten this far! Starting Mantel!")
q = mantel.correlog(n,x)
pdf("PhyloSignal.pdf")
plot(q)
dev.off()
print("Done")