-
Notifications
You must be signed in to change notification settings - Fork 2
/
Generate_MCD.R
60 lines (42 loc) · 1.89 KB
/
Generate_MCD.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
### Generating the molecular characteristics dendrogram
# RED 2020; [email protected]; [email protected]
options(digits = 10)
require(phangorn) # For tree based functions
require(ggtree) # For tree visualization
require(vegan) # For vegdist
# ################## #
#### Load in data ####
# ################## #
# Set sample name
Sample_Name = "Dataset_Name"
# Load in data
setwd("/path/to/ICR_data")
mol = read.csv(list.files(pattern = "*Mol.csv"), row.names = 1) # Load in molecular data
### Ensuring that isotopic peaks are removed
if(length(which(colnames(data) %in% "C13")) > 0){
w = row.names(data)[which(data$C13 > 0)]
if(length(w) > 0){
data = data[-which(row.names(data) %in% w),]
}
rm("w")
}
# Removing peaks that have no formula assignments
mol = mol[-which(mol$MolForm %in% NA),]
# Setting objects for useful parameters
Mol.Info = mol[,c("C", "H", "O", "N", "S", "P", "DBE", "AI_Mod", "kdefect"), drop = F]
Mol.Ratio = mol[,c("OtoC_ratio", "HtoC_ratio", "NtoC_ratio", "PtoC_ratio", "NtoP_ratio")]
# ##################### #
#### Generating tree ####
# ##################### #
# Pairwise distance between peaks
Mol.Info = as.data.frame(apply(Mol.Info, 2, scale), row.names = row.names(Mol.Info)) # Generating a distance matrix based upon the provided parameters
# Create tree
tree = as.phylo(hclust(vegdist(Mol.Info, "euclidean"), "average")) # Converting the distance matrix into a tree
# Quick visualization (with Van Krevelen compound classes)
mol = cbind(row.names(mol), mol) # Peak names need to be the first column for ggtree to work
col = colorRampPalette(c("dodgerblue4", "goldenrod3", "firebrick3"))(length(unique(mol$bs1_class)))
ggtree(tree, layout = "circular") %<+% mol + geom_tippoint(aes(color = bs1_class), na.rm = T) +
scale_color_manual(values = col)+
theme(legend.position = "right")
# Writing tre
write.tree(tree, paste(Sample_Name, "_MCD_UPGMA.tre", sep = ""))