Percentage of individuals in each group with associated subdomains (subdomain)
# ######################################################################################################################################################################################################## ######################################################################################################################################################################################################## # UP and DOWN subdomain Separate plotinglibrary(gplots)
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 4.5.2
library(reshape2) library(RColorBrewer) library(dplyr)
library(viridis)
library(ggrepel) library(corrplot)
library(plotly)
library(patchwork) library(stringr) # Open subdomains comparison file" #data_subdomain <- read.csv("comarative_subdomain_results.csv", sep =",", header = TRUE, stringsAsFactors = FALSE) data_subdomain <- read.csv("comarative_subdomain_results_050525.csv", sep =",", header = TRUE, stringsAsFactors = FALSE) # Truncate long Biodomain_Subdomain names to 40 characters data_subdomain <- data_subdomain %>% mutate(Biodomain_Subdomain = str_trunc(Biodomain_Subdomain, width = 40)) # Removing the 1st and 2nd columns data_subdomain_new_UP <- data_subdomain[, -c(1, 2, 3, 4, 7, 9, 11, 13)] head(data_subdomain_new_UP)
## Biodomain_Subdomain UP_ratio_AD_female UP_ratio_AD_male ## 1 APP Metabolism_amyloid-beta clearance 0.1015625 0.08510638 ## 2 APP Metabolism_amyloid-beta formation 0.1875000 0.00000000 ## 3 APP Metabolism_amyloid precursor prot... 0.0000000 0.10638298 ## 4 APP Metabolism_amyloid precursor prot... 0.0234375 0.04255319 ## 5 APP Metabolism_other 0.0234375 0.04255319 ## 6 Apoptosis_apoptotic mitochondrial cha... 0.0390625 0.00000000 ## UP_ratio_AsymAD_female UP_ratio_AsymAD_male ## 1 0.050314465 0.04918033 ## 2 0.157232704 0.03278689 ## 3 0.006289308 0.08196721 ## 4 0.012578616 0.01639344 ## 5 0.031446541 0.01639344 ## 6 0.050314465 0.00000000
data_subdomain_new_DOWN <- data_subdomain[, -c(1, 2, 3, 4, 6, 8, 10, 12)] head(data_subdomain_new_DOWN)
## Biodomain_Subdomain DOWN_ratio_AD_female ## 1 APP Metabolism_amyloid-beta clearance 0.0234375 ## 2 APP Metabolism_amyloid-beta formation 0.0234375 ## 3 APP Metabolism_amyloid precursor prot... 0.0781250 ## 4 APP Metabolism_amyloid precursor prot... 0.0078125 ## 5 APP Metabolism_other 0.0156250 ## 6 Apoptosis_apoptotic mitochondrial cha... 0.0546875 ## DOWN_ratio_AD_male DOWN_ratio_AsymAD_female DOWN_ratio_AsymAD_male ## 1 0.00000000 0.006289308 0.01639344 ## 2 0.06382979 0.006289308 0.06557377 ## 3 0.06382979 0.056603774 0.09836066 ## 4 0.04255319 0.006289308 0.00000000 ## 5 0.06382979 0.025157233 0.03278689 ## 6 0.12765957 0.012578616 0.03278689
data_matrix_subdomain_UP <- data_subdomain_new_UP[, -1] # Reorder the columns data_matrix_subdomain_UP <- data_matrix_subdomain_UP[, c( "UP_ratio_AD_female", "UP_ratio_AsymAD_female", "UP_ratio_AD_male", "UP_ratio_AsymAD_male" )] data_matrix_subdomain_DOWN <- data_subdomain_new_DOWN[, -1] # Reorder the columns data_matrix_subdomain_DOWN <- data_matrix_subdomain_DOWN[, c( "DOWN_ratio_AD_female", "DOWN_ratio_AsymAD_female", "DOWN_ratio_AD_male", "DOWN_ratio_AsymAD_male" )] # Replace NA values with 0 (or any other specific value) data_matrix_subdomain_UP[is.na(data_matrix_subdomain_UP)] <- 0 data_matrix_subdomain_DOWN[is.na(data_matrix_subdomain_DOWN)] <- 0 # Set up the color scale using RColorBrewer color_scale <- colorRampPalette(brewer.pal(9, "RdYlGn")) # Assign custom column names colnames(data_matrix_subdomain_UP) <- c("AD_Female", "AsymAD_Female", "AD_Male", "AsymAD_Male") # Assign custom column names colnames(data_matrix_subdomain_DOWN) <- c("AD_Female", "AsymAD_Female", "AD_Male", "AsymAD_Male") # Set up the color scale using RColorBrewer color_scale <- colorRampPalette(brewer.pal(9, "RdYlGn")) color_scale1 <- colorRampPalette(c("white", "lightblue", "blue", "darkblue")) color_scale2 <- colorRampPalette(brewer.pal(9, "RdGy")) color_scale3 <- colorRampPalette(brewer.pal(9, "Greens")) color_scale4 <- colorRampPalette(brewer.pal(9, "Blues")) color_scale5 <- colorRampPalette(brewer.pal(9, "Purples")) color_scale6 <- colorRampPalette(brewer.pal(9, "Oranges")) heatmap.2( as.matrix(data_matrix_subdomain_UP), Rowv = TRUE, Colv = FALSE, col = color_scale3, scale = "none", # Scale rows (genes) to Z-scores trace = "none", # Turn off row and column annotations margins = c(20, 40), # Set margins for row and column labels key = TRUE, # Include a color key keysize = .5, # Size of the color key cexCol = 1.5, # Set column label size cexRow = 1, # Set row label size cellnote=round(data_matrix_subdomain_UP, 2), notecol="black", labRow = data_subdomain$Biodomain_Subdomain, # Row labels (subdomain name) dendrogram = "row", # Show both row and column dendrograms main="Individuals (ratio) in each group exhibit a significant subdomain (subdomain)" )

heatmap.2( as.matrix(data_matrix_subdomain_DOWN), Rowv = TRUE, Colv = FALSE, col = color_scale6, scale = "none", # Scale rows (genes) to Z-scores trace = "none", # Turn off row and column annotations margins = c(20, 40), # Set margins for row and column labels key = TRUE, # Include a color key keysize = .5, # Size of the color key cexCol = 1.5, # Set column label size cexRow = 1, # Set row label size cellnote=round(data_matrix_subdomain_DOWN, 2), notecol="black", labRow = data_subdomain$Biodomain_Subdomain, # Row labels (subdomain name) dendrogram = "row", # Show both row and column dendrograms main="Individuals (ratio) in each group exhibit a significant subdomain (subdomain)" )

######################################################################################################################################################################################################## subset_data_subdomain_UP <- data_subdomain_new_UP %>% filter(UP_ratio_AD_female >= 0.10 | UP_ratio_AD_male >= 0.10 | UP_ratio_AsymAD_female >= 0.10 | UP_ratio_AsymAD_male >= 0.10) subset_data_subdomain_DOWN <- data_subdomain_new_DOWN %>% filter(DOWN_ratio_AD_female >= 0.10 | DOWN_ratio_AD_male >= 0.10 | DOWN_ratio_AsymAD_female >= 0.10 | DOWN_ratio_AsymAD_male >= 0.10) rownames(subset_data_subdomain_UP) <- subset_data_subdomain_UP$Biodomain_Subdomain rownames(subset_data_subdomain_DOWN) <- subset_data_subdomain_DOWN$Biodomain_Subdomain subset_data_subdomain_UP <- subset_data_subdomain_UP[, -1] subset_data_subdomain_DOWN <- subset_data_subdomain_DOWN[, -1] # Replace NA values with 0 (or any other specific value) subset_data_subdomain_UP[is.na(subset_data_subdomain_UP)] <- 0 subset_data_subdomain_DOWN[is.na(subset_data_subdomain_DOWN)] <- 0 head(subset_data_subdomain_UP, 3)
## UP_ratio_AD_female UP_ratio_AD_male ## APP Metabolism_amyloid-beta clearance 0.1015625 0.08510638 ## APP Metabolism_amyloid-beta formation 0.1875000 0.00000000 ## APP Metabolism_amyloid precursor prot... 0.0000000 0.10638298 ## UP_ratio_AsymAD_female ## APP Metabolism_amyloid-beta clearance 0.050314465 ## APP Metabolism_amyloid-beta formation 0.157232704 ## APP Metabolism_amyloid precursor prot... 0.006289308 ## UP_ratio_AsymAD_male ## APP Metabolism_amyloid-beta clearance 0.04918033 ## APP Metabolism_amyloid-beta formation 0.03278689 ## APP Metabolism_amyloid precursor prot... 0.08196721
head(subset_data_subdomain_DOWN, 3)
## DOWN_ratio_AD_female ## Apoptosis_apoptotic mitochondrial cha... 0.0546875 ## Autophagy_mitophagy 0.0703125 ## Cell Cycle_cell cycle phase transition 0.0234375 ## DOWN_ratio_AD_male ## Apoptosis_apoptotic mitochondrial cha... 0.12765957 ## Autophagy_mitophagy 0.06382979 ## Cell Cycle_cell cycle phase transition 0.08510638 ## DOWN_ratio_AsymAD_female ## Apoptosis_apoptotic mitochondrial cha... 0.01257862 ## Autophagy_mitophagy 0.03773585 ## Cell Cycle_cell cycle phase transition 0.03773585 ## DOWN_ratio_AsymAD_male ## Apoptosis_apoptotic mitochondrial cha... 0.03278689 ## Autophagy_mitophagy 0.11475410 ## Cell Cycle_cell cycle phase transition 0.14754098
# Reorder the columns subset_data_subdomain_UP <- subset_data_subdomain_UP[, c( "UP_ratio_AD_female", "UP_ratio_AsymAD_female", "UP_ratio_AD_male", "UP_ratio_AsymAD_male" )] subset_data_subdomain_DOWN <- subset_data_subdomain_DOWN[, c( "DOWN_ratio_AD_female", "DOWN_ratio_AsymAD_female", "DOWN_ratio_AD_male", "DOWN_ratio_AsymAD_male" )] # Assign custom column names colnames(subset_data_subdomain_UP) <- c("AD_Female", "AsymAD_Female", "AD_Male", "AsymAD_Male") colnames(subset_data_subdomain_DOWN) <- c("AD_Female", "AsymAD_Female", "AD_Male", "AsymAD_Male") # Create the heatmap with filtered row names UP <- heatmap.2( as.matrix(subset_data_subdomain_UP), Rowv = TRUE, Colv = FALSE, col = color_scale3, scale = "none", trace = "none", margins = c(20, 50), key = TRUE, keysize = .50, cexCol = 2.5, srtCol = 45, cexRow = 2.5, cellnote=round(subset_data_subdomain_UP, 2), notecex=2, notecol="black", dendrogram = "row", main = "subdomains Present in atleast 5% Individuals" )

# Create the heatmap with filtered row names DOWN <- heatmap.2( as.matrix(subset_data_subdomain_DOWN), Rowv = TRUE, Colv = FALSE, col = color_scale6, scale = "none", trace = "none", margins = c(20, 50), key = TRUE, keysize = .50, cexCol = 2.5, srtCol = 45, cexRow = 2.5, cellnote=round(subset_data_subdomain_DOWN, 2), notecex=2, notecol="black", dendrogram = "row", main = "subdomains Present in atleast 5% Individuals" )

Comparative analysis between/within group for associated subdomains (subdomain)
#Comparative analysis between/within group for associated subdomains (subdomain)
######################################################################################################################################################################################################## # Male vs Female # Define a custom color palette with distinct colors for each biodomain (different type of Biodomain - color define by Greg Carry) custom_colors <- c("Apoptosis" = "#673399", "APP Metabolism" = "#fe6500", "Autophagy" = "#9931fd", "Cell Cycle" = "#18857f", "DNA Repair" = "#f451ad", "Endolysosome" = "#3466cc", "Epigenetic" = "#cb3233", "Immune Response" = "#9ccdcc", "Lipid Metabolism" = "#989898", "Metal Binding and Homeostasis" = "#4b0d20", "Mitochondrial Metabolism" = "#97cb98", "Myelination" = "#996735", "Oxidative Stress" = "#ffcd66", "Proteostasis" = "#c8b269", "RNA Spliceosome" = "#0c9aff", "Structural Stabilization" = "#ff9a9a", "Synapse" = "#329a33", "Tau Homeostasis" = "#cb97cb", "Vasculature" = "#cecd02", "none" = "#7f7f7f") # Male vs Female AD # Open the subdomain file where UP (+) and DOWN (-) subdomains are merged together in a single column and the data is separated as a comparison for Male vs Female subdomains #data_subdomain_MvF_AD <- read.csv("Male_Female_comarative_subdomain_results.csv", sep =",", header = TRUE, stringsAsFactors = FALSE) data_subdomain_MvF_AD <- read.csv("Male_Female_comarative_subdomain_results_050525.csv", sep =",", header = TRUE, stringsAsFactors = FALSE) head(data_subdomain_MvF_AD, 5)
## Sno unique_id Biodomain Subdomain_initial ## 1 1 AM_ac_4 APP Metabolism amyloid-beta clearance ## 2 2 AM_af_5 APP Metabolism amyloid-beta formation ## 3 3 AM_appbp_2 APP Metabolism amyloid precursor protein biosynthetic process ## 4 4 AM_appmp_3 APP Metabolism amyloid precursor protein metabolic process ## 5 5 AM_others_1 APP Metabolism other ## Subdomain ## 1 amyloid-beta clearance + ## 2 amyloid-beta formation + ## 3 amyloid precursor protein biosynthetic process + ## 4 amyloid precursor protein metabolic process + ## 5 APP_Metabolism_others + ## Biodomain_Subdomain_initial ## 1 APP Metabolism_amyloid-beta clearance ## 2 APP Metabolism_amyloid-beta formation ## 3 APP Metabolism_amyloid precursor protein biosynthetic process ## 4 APP Metabolism_amyloid precursor protein metabolic process ## 5 APP Metabolism _other ## Biodomain_Subdomain Direction ## 1 APP_Metabolism_amyloid-beta_clearance + UP ## 2 APP_Metabolism_amyloid-beta_formation + UP ## 3 APP_Metabolism_amyloid_precursor_protein_biosynthetic_process + UP ## 4 APP_Metabolism_amyloid_precursor_protein_metabolic_process + UP ## 5 APP_Metabolism_others + UP ## Female Male Difference ## 1 0.1015625 0.08510638 0.01645612 ## 2 0.1875000 0.00000000 0.18750000 ## 3 0.0000000 0.10638298 0.10638298 ## 4 0.0234375 0.04255319 0.01911569 ## 5 0.0234375 0.04255319 0.01911569
# Plot the data as a comparison for Male vs Female subdomains ggplot(data_subdomain_MvF_AD, aes(x = Female, y = Male, color = Biodomain, size = Difference)) + geom_point(aes(color=Biodomain, size = Difference), alpha=0.6, stroke = 0) + theme_bw() + # Conditionally label points where AD or AsymAD values are greater than 0.05 geom_text_repel( aes(label = ifelse((Male > 0.10 & Female < 0.05) | (Female > 0.07 & Male < 0.05) | (Female > 0.07 & Male > 0.07), Biodomain_Subdomain, "")), size = 6, max.overlaps = Inf ) + scale_color_manual(values = custom_colors) + labs(color = "AD Biodomain") + # Add x = y line geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray", size = 0.5) + # Add horizontal and vertical dashed lines geom_hline(yintercept = 0.05, linetype = "dashed", color = "gray") + geom_hline(yintercept = 0.10, linetype = "dashed", color = "red") + geom_vline(xintercept = 0.05, linetype = "dashed", color = "gray") + geom_vline(xintercept = 0.10, linetype = "dashed", color = "red") + labs(title = "Subdomain Comparisons for Female_AD vs Male_AD", x = "Female (% of subjects)", y = "Male (% of subjects)") + theme( text = element_text(size = 30), legend.position = "right", axis.text = element_text(size = 30), axis.title = element_text(size = 30) )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0. ## ℹ Please use `linewidth` instead. ## This warning is displayed once every 8 hours. ## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was ## generated.

######################################################################################################################################################################################################## # Wrap long Subdomain names #data_subdomain_MvF_AD <- data_subdomain_MvF_AD %>% #mutate(Subdomain_wrapped = str_wrap(Subdomain, width = 25)) ggplot(data_subdomain_MvF_AD, aes(x = Male, y = Female, color = Biodomain, size = Difference)) + geom_point(alpha = 0.6, stroke = 0) + # Increased alpha for better visibility theme_bw() + # Conditionally label points where |Male - Female| >= 0.05 geom_text_repel( aes(label = ifelse((abs(Female - Male) >= 0.07) | (Female > 0.07 & Male > 0.07), Subdomain, "")), size = 8, max.overlaps = Inf # Ensure all significant labels are shown ) + scale_size(range = c(.1, 10), name="% Difference") + scale_color_manual(values = custom_colors) + labs(color = "AD Biodomain") + # Add x = y reference line geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray", linewidth = 0.5) + # Add horizontal and vertical threshold lines geom_hline(yintercept = 0.05, linetype = "dashed", color = "gray") + geom_hline(yintercept = 0.10, linetype = "dashed", color = "red") + geom_vline(xintercept = 0.05, linetype = "dashed", color = "gray") + geom_vline(xintercept = 0.10, linetype = "dashed", color = "red") + labs(title = "Subdomain Comparisons for Female_AD vs Male_AD", x = "Male (% of subjects)", y = "Female (% of subjects)") + theme( text = element_text(size = 30), legend.position = "right", axis.text = element_text(size = 30), axis.title = element_text(size = 30) ) + theme(legend.text=element_text(size=16))

######################################################################################################################################################################################################## # Calculate difference dynamically (Female - Male) data_subdomain_MvF_AD <- data_subdomain_MvF_AD %>% mutate(Difference = Female - Male) %>% # Compute difference arrange(Difference) # Sort data: Male-dominant (negative) first, Female-dominant (positive) last # Select top pathways for better visualization top_n <- 20 # Adjust this number as needed data_top <- data_subdomain_MvF_AD[c(1:(top_n/2), (nrow(data_subdomain_MvF_AD) - (top_n/2) + 1):nrow(data_subdomain_MvF_AD)), ] # Create a diverging bar plot with value labels ggplot(data_top, aes(x = reorder(Biodomain_Subdomain, Difference), y = Difference, fill = Difference > 0)) + geom_bar(stat = "identity", show.legend = FALSE) + coord_flip() + # Flip for horizontal bars scale_fill_manual(values = c("blue", "red")) + # Blue = Male-dominant, Red = Female-dominant theme_minimal() + labs(title = "Top Male vs Female Subdomains", #subtitle = "Red = More Female-Dominant | Blue = More Male-Dominant", x = "Biodomain_Subdomain", y = "% Difference") + theme(text = element_text(size = 14), axis.text = element_text(size = 14))

ggplot(data_top, aes(x = reorder(Biodomain_Subdomain, Difference), y = Difference, fill = Direction)) + geom_bar(stat = "identity", show.legend = TRUE) + geom_text(aes(label = paste0(round(abs(Difference) * 100, 1), "%")), # absolute value ×100 + % sign hjust = ifelse(data_top$Difference >= 0, -0.1, 1.1), # position text left or right size = 8) + coord_flip() + # horizontal bars scale_fill_manual(values = c("DOWN" = "lightcoral", "UP" = "palegreen")) + theme_minimal() + labs(title = "Top Male vs Female Subdomains", x = "Biodomain_Subdomain", y = "% Difference", fill = "Direction") + theme(text = element_text(size = 30), axis.text = element_text(size = 20)) + expand_limits(y = c(min(data_top$Difference) * 1.2, max(data_top$Difference) * 1.2))

####################################################################################################################################################################################################### library(ggplot2) library(dplyr) library(tidyr)
# Select top N subdomains by absolute difference top_n <- 20 data_top <- data_subdomain_MvF_AD %>% mutate(Difference = Female - Male, AbsDifference = abs(Difference)) %>% arrange(desc(AbsDifference)) %>% slice(1:top_n) # Reshape to long format (Female and Male side-by-side) data_long <- data_top %>% select(Biodomain_Subdomain_initial, Direction, Female, Male) %>% pivot_longer(cols = c(Female, Male), names_to = "Group", values_to = "Value") %>% mutate( Percent = Value * 100, # Invert values for DOWN direction to go on the left PlotValue = ifelse(Direction == "DOWN", -Percent, Percent), Group = factor(Group, levels = c("Female", "Male")) ) # Plot ggplot(data_long, aes(x = reorder(Biodomain_Subdomain_initial, PlotValue), y = PlotValue)) + geom_segment(aes(xend = Biodomain_Subdomain_initial, y = 0, yend = PlotValue, color = Group), size = 1.5) + geom_point(aes(color = Group), size = 5) + geom_text(aes(label = paste0(round(abs(PlotValue), 1), "%"), color = Group), hjust = ifelse(data_long$PlotValue > 0, -0.2, 1.2), nudge_x = 0.4, size = 5) + coord_flip() + scale_color_manual(values = c("Female" = "hotpink4", "Male" = "lightblue4")) + labs( title = "Top 10 Diverging Subdomains: Female vs Male", x = "Biodomain (Subdomain)", y = "Percentage (Direction-Based)", color = "Group" ) + theme_minimal(base_size = 15) + theme( text = element_text(size = 18), axis.text.y = element_text(size = 14), legend.position = "top" ) + expand_limits(y = c(-max(data_long$Percent) * 1.2, max(data_long$Percent) * 1.2))

######################################################################################################################################################################################################## ######################################################################################################################################################################################################## # AD vs AsymAD # Open the subdomains where the Male and Female percentages are combined together to get AD and AsymAD specific subdomains # Further, UP (+) and DOWN (-) subdomains are merged together in a single column and the data is separated as a comparison for AD vs AsymAD subdomains #data_subdomain_ADvAsymAD <- read.csv("AD_AsymAD_comarative_subdomain_results.csv", sep =",", header = TRUE, stringsAsFactors = FALSE) data_subdomain_ADvAsymAD <- read.csv("AD_AsymAD_comarative_subdomain_results_050525.csv", sep =",", header = TRUE, stringsAsFactors = FALSE) head(data_subdomain_ADvAsymAD, 5)
## Sno unique_id Biodomain Subdomain_initial ## 1 1 AM_ac_4 APP Metabolism amyloid-beta clearance ## 2 2 AM_af_5 APP Metabolism amyloid-beta formation ## 3 3 AM_appbp_2 APP Metabolism amyloid precursor protein biosynthetic process ## 4 4 AM_appmp_3 APP Metabolism amyloid precursor protein metabolic process ## 5 5 AM_others_1 APP Metabolism other ## Subdomain ## 1 amyloid-beta clearance + ## 2 amyloid-beta formation + ## 3 amyloid precursor protein biosynthetic process + ## 4 amyloid precursor protein metabolic process + ## 5 APP_Metabolism_others + ## Biodomain_Subdomain_initial ## 1 APP Metabolism_amyloid-beta clearance ## 2 APP Metabolism_amyloid-beta formation ## 3 APP Metabolism_amyloid precursor protein biosynthetic process ## 4 APP Metabolism_amyloid precursor protein metabolic process ## 5 APP Metabolism_other ## Biodomain_Subdomain Direction ## 1 APP_Metabolism_amyloid-beta_clearance + UP ## 2 APP_Metabolism_amyloid-beta_formation + UP ## 3 APP_Metabolism_amyloid_precursor_protein_biosynthetic_process + UP ## 4 APP_Metabolism_amyloid_precursor_protein_metabolic_process + UP ## 5 APP_Metabolism_others + UP ## AD AsymAD Difference ## 1 0.18666888 0.09949479 0.08717409 ## 2 0.18750000 0.19001959 0.00251959 ## 3 0.10638298 0.08825652 0.01812646 ## 4 0.06599069 0.02897206 0.03701863 ## 5 0.06599069 0.04783998 0.01815071
# Plot the data as a comparison for AD vs AsymAD pathways ggplot(data_subdomain_ADvAsymAD, aes(x = AD, y = AsymAD, color = Biodomain, size = Difference)) + geom_point(aes(color = Biodomain, size = Difference), alpha = 0.3, stroke = 0) + theme_bw() + # Conditionally label points based on the specified conditions geom_text_repel( aes(label = ifelse((AD > 0.10 & Difference > 0.05) | (AsymAD > 0.10 & Difference > 0.05) | (AD > 0.15 & AsymAD > 0.15), Subdomain, "")), size = 8 ) + scale_color_manual(values = custom_colors) + labs(color = "AD Biodomain") + # Add x = y line geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray", size = 0.5) + # Add horizontal and vertical dashed lines geom_hline(yintercept = 0.05, linetype = "dashed", color = "gray") + geom_hline(yintercept = 0.10, linetype = "dashed", color = "red") + geom_vline(xintercept = 0.05, linetype = "dashed", color = "gray") + geom_vline(xintercept = 0.10, linetype = "dashed", color = "red") + labs(title = "Subdomain Comparisons for AD vs AsymAD", x = "AD (% of subjects)", y = "AsymAD (% of subjects)") + theme( text = element_text(size = 30), legend.position = "right", axis.text = element_text(size = 30), axis.title = element_text(size = 30) )
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider ## increasing max.overlaps

######################################################################################################################################################################################################## # Wrap long Subdomain names #data_subdomain_ADvAsymAD <- data_subdomain_ADvAsymAD %>% #mutate(Subdomain_wrapped = str_wrap(Subdomain, width = 25)) ggplot(data_subdomain_ADvAsymAD, aes(x = AD, y = AsymAD, color = Biodomain, size = Difference)) + geom_point(alpha = 0.3, stroke = 0) + # Points with transparency theme_bw() + # Conditionally label points where |AD - AsymAD| >= 0.05 geom_text_repel( aes(label = ifelse((abs(AD - AsymAD) >= 0.06) | (AD > 0.15 & AsymAD > 0.15), Subdomain, "")), size = 8, max.overlaps = Inf ) + scale_size(range = c(.1, 10), name="% Difference") + scale_color_manual(values = custom_colors) + labs(color = "AD Biodomain") + # Add x = y reference line geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray", size = 0.5) + # Add horizontal and vertical threshold lines geom_hline(yintercept = 0.05, linetype = "dashed", color = "gray") + geom_hline(yintercept = 0.10, linetype = "dashed", color = "red") + geom_vline(xintercept = 0.05, linetype = "dashed", color = "gray") + geom_vline(xintercept = 0.10, linetype = "dashed", color = "red") + # Title and axis labels labs(title = "Subdomain Comparisons for AD vs AsymAD", x = "AD (% of subjects)", y = "AsymAD (% of subjects)") + # Theme customization theme( text = element_text(size = 30), legend.position = "right", axis.text = element_text(size = 30), axis.title = element_text(size = 30) ) + theme(legend.text=element_text(size=16))

####################################################################################################################################################################################################### # Calculate difference dynamically (AD - AsuymAD) data_subdomain_ADvAsymAD <- data_subdomain_ADvAsymAD %>% mutate(Difference = AD - AsymAD) %>% # Compute difference arrange(Difference) # Sort data: AD-dominant (negative) first, AsymAD-dominant (positive) last # Select top pathways for better visualization top_n <- 20 # Adjust this number as needed data_top <- data_subdomain_ADvAsymAD[c(1:(top_n/2), (nrow(data_subdomain_ADvAsymAD) - (top_n/2) + 1):nrow(data_subdomain_ADvAsymAD)), ] # Create a diverging bar plot with value labels ggplot(data_top, aes(x = reorder(Biodomain_Subdomain_initial, Difference), y = Difference, fill = Difference > 0)) + geom_bar(stat = "identity", show.legend = FALSE) + coord_flip() + # Flip for horizontal bars scale_fill_manual(values = c("blue", "red")) + # Blue = Male-dominant, Red = Female-dominant theme_minimal() + labs(title = "Top AD vs AsymAD Subdomains", x = "Biodomain_Subdomain", y = "% Difference") + theme(text = element_text(size = 14), axis.text = element_text(size = 14))

ggplot(data_top, aes(x = reorder(Biodomain_Subdomain, Difference), y = Difference, fill = Direction)) + geom_bar(stat = "identity", show.legend = TRUE) + geom_text(aes(label = paste0(round(abs(Difference) * 100, 1), "%")), # absolute value ×100 + % sign hjust = ifelse(data_top$Difference >= 0, -0.1, 1.1), # position text left or right size = 8) + coord_flip() + # horizontal bars scale_fill_manual(values = c("DOWN" = "lightcoral", "UP" = "palegreen")) + theme_minimal() + labs(title = "Top AD vs AsymAD Subdomains", x = "Biodomain (Subdomain)", y = "% Difference", fill = "Direction") + theme(text = element_text(size = 30), axis.text = element_text(size = 20)) + expand_limits(y = c(min(data_top$Difference) * 1.2, max(data_top$Difference) * 1.2))

####################################################################################################################################################################################################### library(ggplot2) library(dplyr) library(tidyr) # Select top N subdomains by absolute difference top_n <- 20 data_top <- data_subdomain_ADvAsymAD %>% mutate(Difference = AD - AsymAD, AbsDifference = abs(Difference)) %>% arrange(desc(AbsDifference)) %>% slice(1:top_n) # Reshape to long format (AD and AsymAD side-by-side) data_long <- data_top %>% select(Biodomain_Subdomain_initial, Direction, AD, AsymAD) %>% pivot_longer(cols = c(AD, AsymAD), names_to = "Group", values_to = "Value") %>% mutate( Percent = Value * 100, # Invert values for DOWN direction to go on the left PlotValue = ifelse(Direction == "DOWN", -Percent, Percent), Group = factor(Group, levels = c("AD", "AsymAD")) ) # Plot ggplot(data_long, aes(x = reorder(Biodomain_Subdomain_initial, PlotValue), y = PlotValue)) + geom_segment(aes(xend = Biodomain_Subdomain_initial, y = 0, yend = PlotValue, color = Group), size = 1.5) + geom_point(aes(color = Group), size = 5) + geom_text(aes(label = paste0(round(abs(PlotValue), 1), "%"), color = Group), hjust = ifelse(data_long$PlotValue > 0, -0.2, 1.2), nudge_x = 0.4, size = 5) + coord_flip() + scale_color_manual(values = c("AD" = "purple4", "AsymAD" = "mediumpurple1")) + labs( title = "Top 10 Diverging Subdomains: AD vs AsymAD", x = "Biodomain (Subdomain)", y = "Percentage (Direction-Based)", color = "Group" ) + theme_minimal(base_size = 15) + theme( text = element_text(size = 18), axis.text.y = element_text(size = 14), legend.position = "top" ) + expand_limits(y = c(-max(data_long$Percent) * 1.2, max(data_long$Percent) * 1.2))

Graph-based Clustering using subdomain Specific Z score Distributions Results (AD male)
# # create igraph S3 object net_pos_AD_male <- graph.data.frame(sig_subdomain_AD_male_pos_edge, directed = FALSE) # Get unique vertices from biodomain_subdomain_annotation and variable term_name_vertices <- unique(sig_subdomain_AD_male_pos_edge$biodomain_subdomain_annotation) variable_vertices <- unique(sig_subdomain_AD_male_pos_edge$variable) # Assign shape "square" to term_name vertices and "circle" to variable vertices vertex_shapes <- rep("circle", vcount(net_pos_AD_male)) # Initialize all vertices as circles vertex_shapes[V(net_pos_AD_male)$name %in% term_name_vertices] <- "square" # Assign square shape to Var1 vertices # store original margins orig_mar <- par()$mar # set new margins to limit whitespace in plot par(mar=rep(.1, 4)) # plot the igraph S3 object # We can use various layout algorithms in igraph to generate the layout, such as Fruchterman-Reingold layout (layout_with_fr), Kamada-Kawai layout (layout_with_kk), or circular layout (layout_in_circle) plot(net_pos_AD_male, layout = layout_with_fr(net_pos_AD_male), edge.width = E(net_pos_AD_male)$value, vertex.label.cex = .5, vertex.size = 3, vertex.shape = vertex_shapes) # Export your igraph object as an edge list write.graph(net_pos_AD_male, file = "net_pos_AD_male.graphml", format = "graphml") # community detection based on edge betweenness (Newman-Girvan) #ceb_pos_AD_male <- cluster_edge_betweenness(net_pos_AD_male, weights = E(net_pos_AD_male)$value) # community detection based on Infomap Algorithm #ceb_pos_AD_male <- cluster_infomap(net_pos_AD_male, e.weights = E(net_pos_AD_male)$value) # community detection based on Louvain Clustering (Modularity Optimization) ceb_pos_AD_male <- cluster_louvain(net_pos_AD_male, weights = E(net_pos_AD_male)$value) class(ceb_pos_AD_male) # plot the igraph S3 objec twith sub-cluster plot(ceb_pos_AD_male, net_pos_AD_male, layout = layout_with_fr(net_pos_AD_male), edge.width = E(net_pos_AD_male)$value, vertex.label.cex = .5, vertex.size = 3, vertex.shape = vertex_shapes) # community membership for each node par(mar=orig_mar) dendPlot(ceb_pos_AD_male, mode="hclust", cex=.5) # Get the community membership data community_membership_pos_AD_male <- membership(ceb_pos_AD_male) # Convert it to a data frame with two columns: Node and Community community_df_pos_AD_male <- data.frame(Node = names(community_membership_pos_AD_male), Community = as.integer(community_membership_pos_AD_male)) # Count the number of community detected unique(community_df_pos_AD_male$Community) # Write the data frame to a file write.table(community_df_pos_AD_male, file = "community_membership_subdomain_pos_AD_male.txt", row.names = FALSE, col.names = TRUE, sep = "\t") ################################################################################################################################################## # Make individual community as example # Subset the graph for Community 5 community_5_nodes <- V(net_pos_AD_male)[community_df_pos_AD_male$Community == 5] # Select nodes from Community 5 # Induce a subgraph with only Community 5 nodes subgraph_community_5 <- induced_subgraph(net_pos_AD_male, community_5_nodes) # Assign "circle" shape to all vertices V(subgraph_community_5)$shape <- "circle" # Set "square" shape for vertices that match the term_name_vertices V(subgraph_community_5)$shape[V(subgraph_community_5)$name %in% term_name_vertices] <- "square" # Assign "blue" color to all vertices (default for circles) V(subgraph_community_5)$color <- "lightblue" # Set "red" color for vertices that are squares V(subgraph_community_5)$color[V(subgraph_community_5)$shape == "square"] <- "khaki1" # set new margins to limit whitespace in plot #par(mar=rep(.1, 4)) # Plot the subgraph for Community 5 plot(subgraph_community_5, layout = layout_with_fr(subgraph_community_5), edge.width = E(net_pos_AD_male)$value, vertex.size = 10, vertex.label.cex = 1, vertex.label.color = "black", main = "Community 5 Subgraph") # Convert igraph object to tbl_graph (a tidygraph object) #graph_tbl <- as_tbl_graph(subgraph_community_5) # Create the ggraph plot #ggraph(graph_tbl, layout = 'fr') + # Fruchterman-Reingold layout #geom_edge_link(aes(width = E(subgraph_community_5)$value), alpha = 0.5) + # Edges #geom_node_point(aes(color = V(subgraph_community_5)$color, shape = V(subgraph_community_5)$shape), size = 10) + # Vertices #geom_node_text(aes(label = V(subgraph_community_5)$name), repel = TRUE, size = 10) + # Non-overlapping labels #theme_void() + # Minimalist theme #scale_shape_manual(values = c("circle" = 16, "square" = 15)) + # Map shapes #scale_color_manual(values = c("lightblue", "khaki1")) # Map colors ################################################################################################################################################## # Make heatmap for the comunitties # Select the biodomain_subdomain_annotation and variable entities from the sig_subdomain_AD_male_pos_edge file term_name <- sig_subdomain_AD_male_pos_edge$biodomain_subdomain_annotation variable <- sig_subdomain_AD_male_pos_edge$variable # Separate term_name and variable entities based on their community membership and sort the Community column in ascending order term_name_nodes <- community_df_pos_AD_male[community_df_pos_AD_male$Node %in% term_name, ] term_name_nodes <- term_name_nodes[order(term_name_nodes$Community), ] variable_nodes <- community_df_pos_AD_male[community_df_pos_AD_male$Node %in% variable, ] variable_nodes <- variable_nodes[order(variable_nodes$Community), ] head(term_name_nodes) head(variable_nodes) # Prepare the row and col entries for a heatmap based on term_name and variable entities row_names <- unique(c(term_name_nodes$Node)) col_names <- unique(c(variable_nodes$Node)) # Create a matrix filled with zeros matrix_data_AD_male_pos <- matrix(0, nrow = length(row_names), ncol = length(col_names), dimnames = list(row_names, col_names)) # Populate the matrix based on the information in term_name_nodes and variable_nodes for (i in 1:nrow(term_name_nodes)) { matrix_data_AD_male_pos[term_name_nodes[i, "Node"], variable_nodes[variable_nodes$Community == term_name_nodes[i, "Community"], "Node"]] <- variable_nodes[variable_nodes$Community == term_name_nodes[i, "Community"], "Community"] } write.table(matrix_data_AD_male_pos,"subdomain_matrix_data_pos_AD_male.txt",sep="\t",quote=F) # Create heatmap with matrix matrix_data_AD_male_pos <- as.matrix(matrix_data_AD_male_pos) # Adjust the margins to make room for the X-axis labels par(mar = c(5, 5, 4, 2) + 0.1) heatmap(matrix_data_AD_male_pos, Rowv = NA, Colv = NA, cex.axis = 0.5, main = "Communities identified by Graph Based Clustering for subdomain Comparative subdomain Enrichment Analysis (Male UP subdomains)") ######################################################################################################################################################### # tranfer the data matrix into two column interaction data sig_subdomain_AD_male_neg_edge <- melt(sig_subdomain_AD_male_neg) dim(sig_subdomain_AD_male_neg_edge) # Remove the edges where value (Z score) is "NA" sig_subdomain_AD_male_neg_edge <- filter(sig_subdomain_AD_male_neg_edge, !is.na(value)) dim(sig_subdomain_AD_male_neg_edge) # Convert the negative values in the 'value' column to absolute value for network construction sig_subdomain_AD_male_neg_edge$value <- abs(sig_subdomain_AD_male_neg_edge$value) # create igraph S3 object net_neg_AD_male <- graph.data.frame(sig_subdomain_AD_male_neg_edge, directed = FALSE) # Get unique vertices from biodomain_subdomain_annotation and variable term_name_vertices <- unique(sig_subdomain_AD_male_neg_edge$biodomain_subdomain_annotation) variable_vertices <- unique(sig_subdomain_AD_male_neg_edge$variable) # Assign shape "square" to term_name vertices and "circle" to variable vertices vertex_shapes <- rep("circle", vcount(net_neg_AD_male)) # Initialize all vertices as circles vertex_shapes[V(net_neg_AD_male)$name %in% term_name_vertices] <- "square" # Assign square shape to Var1 vertices # store original margins orig_mar <- par()$mar # set new margins to limit whitespace in plot par(mar=rep(.1, 4)) # plot the igraph S3 object # We can use various layout algorithms in igraph to generate the layout, such as Fruchterman-Reingold layout (layout_with_fr), Kamada-Kawai layout (layout_with_kk), or circular layout (layout_in_circle) plot(net_neg_AD_male, layout = layout_with_fr(net_neg_AD_male), edge.width = E(net_neg_AD_male)$value, vertex.label.cex = .5, vertex.size = 3, vertex.shape = vertex_shapes) # community detection based on Weighted Newman-Girvan (Edge Betweenness) #ceb_neg_AD_male <- cluster_edge_betweenness(net_neg_AD_male, weights = E(net_neg_AD_male)$value) # community detection based on Louvain Clustering (Modularity Optimization) ceb_neg_AD_male <- cluster_louvain(net_neg_AD_male, weights = E(net_neg_AD_male)$value) # community detection based on Infomap Algorithm #ceb_neg_AD_male <- cluster_infomap(net_neg_AD_male, e.weights = E(net_neg_AD_male)$value) # community detection based on Walktrap Algorithm #ceb_neg_AD_male <- cluster_walktrap(net_neg_AD_male, weights = E(net_neg_AD_male)$value) class(ceb_neg_AD_male) # plot the igraph S3 objec twith sub-cluster plot(ceb_neg_AD_male, net_neg_AD_male, layout = layout_with_fr(net_neg_AD_male), edge.width = E(net_neg_AD_male)$value, vertex.label.cex = .5, vertex.size = 3, vertex.shape = vertex_shapes) # community membership for each node par(mar=orig_mar) dendPlot(ceb_neg_AD_male, mode="hclust", cex=.5) # Get the community membership data community_membership_neg_AD_male <- membership(ceb_neg_AD_male) # Convert it to a data frame with two columns: Node and Community community_df_neg_AD_male <- data.frame(Node = names(community_membership_neg_AD_male), Community = as.integer(community_membership_neg_AD_male)) # Count the number of community detected unique(community_df_neg_AD_male$Community) # Write the data frame to a file write.table(community_df_neg_AD_male, file = "community_membership_subdomain_neg_AD_male.txt", row.names = FALSE, col.names = TRUE, sep = "\t") # Select the biodomain_subdomain_annotation and variable entities from the sig_subdomain_AD_male_neg_edge file term_name <- sig_subdomain_AD_male_neg_edge$biodomain_subdomain_annotation variable <- sig_subdomain_AD_male_neg_edge$variable # Separate term_name and variable entities based on their community membership and sort the Community column in ascending order term_name_nodes <- community_df_neg_AD_male[community_df_neg_AD_male$Node %in% term_name, ] term_name_nodes <- term_name_nodes[order(term_name_nodes$Community), ] variable_nodes <- community_df_neg_AD_male[community_df_neg_AD_male$Node %in% variable, ] variable_nodes <- variable_nodes[order(variable_nodes$Community), ] head(term_name_nodes) head(variable_nodes) # Prepare the row and col entries for a heatmap based on term_name and variable entities row_names <- unique(c(term_name_nodes$Node)) col_names <- unique(c(variable_nodes$Node)) # Create a matrix filled with zeros matrix_data_AD_male_neg <- matrix(0, nrow = length(row_names), ncol = length(col_names), dimnames = list(row_names, col_names)) # Populate the matrix based on the information in term_name_nodes and variable_nodes for (i in 1:nrow(term_name_nodes)) { matrix_data_AD_male_neg[term_name_nodes[i, "Node"], variable_nodes[variable_nodes$Community == term_name_nodes[i, "Community"], "Node"]] <- variable_nodes[variable_nodes$Community == term_name_nodes[i, "Community"], "Community"] } write.table(matrix_data_AD_male_neg,"subdomain_matrix_data_neg_AD_male.txt",sep="\t",quote=F) # Create heatmap with matrix matrix_data_AD_male_neg <- as.matrix(matrix_data_AD_male_neg) # Adjust the margins to make room for the X-axis labels par(mar = c(5, 5, 4, 2) + 0.1) heatmap(matrix_data_AD_male_neg, Rowv = NA, Colv = NA, cex.axis = 0.5, main = "Communities identified by Graph Based Clustering for subdomain Comparative subdomain Enrichment Analysis (Male DOWN subdomains)") end.rcode--> #Graph-based Clustering using subdomain Specific Z score Distributions Results (AD female)
#Save individual matrix file as .tiff format
#