Individual subdomain Specific Z score Calculation using TMT Proteomics from the ROSMAP Cohort (comparative subdomain Profiling - AD-male vs AD-female)

Percentage of individuals in each group with associated subdomains (subdomain)

# ######################################################################################################################################################################################################## ######################################################################################################################################################################################################## # UP and DOWN subdomain Separate ploting
library(gplots)
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 4.5.2
library(reshape2)
library(RColorBrewer)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(viridis)
## Loading required package: viridisLite
library(ggrepel)
library(corrplot)
## corrplot 0.95 loaded
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(patchwork)
library(stringr)


# Open subdomains comparison file"
#data_subdomain <- read.csv("comarative_subdomain_results.csv", sep =",", header = TRUE, stringsAsFactors = FALSE)

data_subdomain <- read.csv("comarative_subdomain_results_050525.csv", sep =",", header = TRUE, stringsAsFactors = FALSE)


# Truncate long Biodomain_Subdomain names to 40 characters
data_subdomain <- data_subdomain %>%
  mutate(Biodomain_Subdomain = str_trunc(Biodomain_Subdomain, width = 40))


# Removing the 1st and 2nd columns
data_subdomain_new_UP <- data_subdomain[, -c(1, 2, 3, 4, 7, 9, 11, 13)]

head(data_subdomain_new_UP)
##                        Biodomain_Subdomain UP_ratio_AD_female UP_ratio_AD_male
## 1    APP Metabolism_amyloid-beta clearance          0.1015625       0.08510638
## 2    APP Metabolism_amyloid-beta formation          0.1875000       0.00000000
## 3 APP Metabolism_amyloid precursor prot...          0.0000000       0.10638298
## 4 APP Metabolism_amyloid precursor prot...          0.0234375       0.04255319
## 5                     APP Metabolism_other          0.0234375       0.04255319
## 6 Apoptosis_apoptotic mitochondrial cha...          0.0390625       0.00000000
##   UP_ratio_AsymAD_female UP_ratio_AsymAD_male
## 1            0.050314465           0.04918033
## 2            0.157232704           0.03278689
## 3            0.006289308           0.08196721
## 4            0.012578616           0.01639344
## 5            0.031446541           0.01639344
## 6            0.050314465           0.00000000
data_subdomain_new_DOWN <- data_subdomain[, -c(1, 2, 3, 4, 6, 8, 10, 12)]

head(data_subdomain_new_DOWN)
##                        Biodomain_Subdomain DOWN_ratio_AD_female
## 1    APP Metabolism_amyloid-beta clearance            0.0234375
## 2    APP Metabolism_amyloid-beta formation            0.0234375
## 3 APP Metabolism_amyloid precursor prot...            0.0781250
## 4 APP Metabolism_amyloid precursor prot...            0.0078125
## 5                     APP Metabolism_other            0.0156250
## 6 Apoptosis_apoptotic mitochondrial cha...            0.0546875
##   DOWN_ratio_AD_male DOWN_ratio_AsymAD_female DOWN_ratio_AsymAD_male
## 1         0.00000000              0.006289308             0.01639344
## 2         0.06382979              0.006289308             0.06557377
## 3         0.06382979              0.056603774             0.09836066
## 4         0.04255319              0.006289308             0.00000000
## 5         0.06382979              0.025157233             0.03278689
## 6         0.12765957              0.012578616             0.03278689
data_matrix_subdomain_UP <- data_subdomain_new_UP[, -1]

# Reorder the columns
data_matrix_subdomain_UP <- data_matrix_subdomain_UP[, c(
  "UP_ratio_AD_female",
  "UP_ratio_AsymAD_female",
  "UP_ratio_AD_male",
  "UP_ratio_AsymAD_male"
)]


data_matrix_subdomain_DOWN <- data_subdomain_new_DOWN[, -1]

# Reorder the columns
data_matrix_subdomain_DOWN <- data_matrix_subdomain_DOWN[, c(
  "DOWN_ratio_AD_female",
  "DOWN_ratio_AsymAD_female",
  "DOWN_ratio_AD_male",
  "DOWN_ratio_AsymAD_male"
)]



# Replace NA values with 0 (or any other specific value)
data_matrix_subdomain_UP[is.na(data_matrix_subdomain_UP)] <- 0
data_matrix_subdomain_DOWN[is.na(data_matrix_subdomain_DOWN)] <- 0

# Set up the color scale using RColorBrewer
color_scale <- colorRampPalette(brewer.pal(9, "RdYlGn"))

# Assign custom column names
colnames(data_matrix_subdomain_UP) <- c("AD_Female", "AsymAD_Female", "AD_Male", "AsymAD_Male")

# Assign custom column names
colnames(data_matrix_subdomain_DOWN) <- c("AD_Female", "AsymAD_Female", "AD_Male", "AsymAD_Male")


# Set up the color scale using RColorBrewer
color_scale <- colorRampPalette(brewer.pal(9, "RdYlGn"))
color_scale1 <- colorRampPalette(c("white", "lightblue", "blue", "darkblue"))
color_scale2 <- colorRampPalette(brewer.pal(9, "RdGy"))
color_scale3 <- colorRampPalette(brewer.pal(9, "Greens"))
color_scale4 <- colorRampPalette(brewer.pal(9, "Blues"))
color_scale5 <- colorRampPalette(brewer.pal(9, "Purples"))
color_scale6 <- colorRampPalette(brewer.pal(9, "Oranges"))



heatmap.2(
as.matrix(data_matrix_subdomain_UP),
Rowv = TRUE,
Colv = FALSE,
col = color_scale3,
scale = "none",  # Scale rows (genes) to Z-scores
trace = "none",  # Turn off row and column annotations
margins = c(20, 40),  # Set margins for row and column labels
key = TRUE,  # Include a color key
keysize = .5,  # Size of the color key
cexCol = 1.5,  # Set column label size
cexRow = 1,  # Set row label size
cellnote=round(data_matrix_subdomain_UP, 2),
notecol="black",
labRow = data_subdomain$Biodomain_Subdomain,  # Row labels (subdomain name)
dendrogram = "row",  # Show both row and column dendrograms
main="Individuals (ratio) in each group exhibit a significant subdomain (subdomain)"
)
plot of chunk unnamed-chunk-1
heatmap.2(
as.matrix(data_matrix_subdomain_DOWN),
Rowv = TRUE,
Colv = FALSE,
col = color_scale6,
scale = "none",  # Scale rows (genes) to Z-scores
trace = "none",  # Turn off row and column annotations
margins = c(20, 40),  # Set margins for row and column labels
key = TRUE,  # Include a color key
keysize = .5,  # Size of the color key
cexCol = 1.5,  # Set column label size
cexRow = 1,  # Set row label size
cellnote=round(data_matrix_subdomain_DOWN, 2),
notecol="black",
labRow = data_subdomain$Biodomain_Subdomain,  # Row labels (subdomain name)
dendrogram = "row",  # Show both row and column dendrograms
main="Individuals (ratio) in each group exhibit a significant subdomain (subdomain)"
)
plot of chunk unnamed-chunk-1
########################################################################################################################################################################################################

subset_data_subdomain_UP <- data_subdomain_new_UP %>% filter(UP_ratio_AD_female >= 0.10 | UP_ratio_AD_male >= 0.10 | UP_ratio_AsymAD_female >= 0.10 | UP_ratio_AsymAD_male >= 0.10)

subset_data_subdomain_DOWN <- data_subdomain_new_DOWN %>% filter(DOWN_ratio_AD_female >= 0.10 | DOWN_ratio_AD_male >= 0.10 | DOWN_ratio_AsymAD_female >= 0.10 | DOWN_ratio_AsymAD_male >= 0.10)


rownames(subset_data_subdomain_UP) <- subset_data_subdomain_UP$Biodomain_Subdomain
rownames(subset_data_subdomain_DOWN) <- subset_data_subdomain_DOWN$Biodomain_Subdomain


subset_data_subdomain_UP <- subset_data_subdomain_UP[, -1]
subset_data_subdomain_DOWN <- subset_data_subdomain_DOWN[, -1]


# Replace NA values with 0 (or any other specific value)
subset_data_subdomain_UP[is.na(subset_data_subdomain_UP)] <- 0
subset_data_subdomain_DOWN[is.na(subset_data_subdomain_DOWN)] <- 0


head(subset_data_subdomain_UP, 3)
##                                          UP_ratio_AD_female UP_ratio_AD_male
## APP Metabolism_amyloid-beta clearance             0.1015625       0.08510638
## APP Metabolism_amyloid-beta formation             0.1875000       0.00000000
## APP Metabolism_amyloid precursor prot...          0.0000000       0.10638298
##                                          UP_ratio_AsymAD_female
## APP Metabolism_amyloid-beta clearance               0.050314465
## APP Metabolism_amyloid-beta formation               0.157232704
## APP Metabolism_amyloid precursor prot...            0.006289308
##                                          UP_ratio_AsymAD_male
## APP Metabolism_amyloid-beta clearance              0.04918033
## APP Metabolism_amyloid-beta formation              0.03278689
## APP Metabolism_amyloid precursor prot...           0.08196721
head(subset_data_subdomain_DOWN, 3)
##                                          DOWN_ratio_AD_female
## Apoptosis_apoptotic mitochondrial cha...            0.0546875
## Autophagy_mitophagy                                 0.0703125
## Cell Cycle_cell cycle phase transition              0.0234375
##                                          DOWN_ratio_AD_male
## Apoptosis_apoptotic mitochondrial cha...         0.12765957
## Autophagy_mitophagy                              0.06382979
## Cell Cycle_cell cycle phase transition           0.08510638
##                                          DOWN_ratio_AsymAD_female
## Apoptosis_apoptotic mitochondrial cha...               0.01257862
## Autophagy_mitophagy                                    0.03773585
## Cell Cycle_cell cycle phase transition                 0.03773585
##                                          DOWN_ratio_AsymAD_male
## Apoptosis_apoptotic mitochondrial cha...             0.03278689
## Autophagy_mitophagy                                  0.11475410
## Cell Cycle_cell cycle phase transition               0.14754098
# Reorder the columns
subset_data_subdomain_UP <- subset_data_subdomain_UP[, c(
  "UP_ratio_AD_female",
  "UP_ratio_AsymAD_female",
  "UP_ratio_AD_male",
  "UP_ratio_AsymAD_male"
)]

subset_data_subdomain_DOWN <- subset_data_subdomain_DOWN[, c(
  "DOWN_ratio_AD_female",
  "DOWN_ratio_AsymAD_female",
  "DOWN_ratio_AD_male",
  "DOWN_ratio_AsymAD_male"
)]


# Assign custom column names
colnames(subset_data_subdomain_UP) <- c("AD_Female", "AsymAD_Female", "AD_Male", "AsymAD_Male")

colnames(subset_data_subdomain_DOWN) <- c("AD_Female", "AsymAD_Female", "AD_Male", "AsymAD_Male")




# Create the heatmap with filtered row names
UP <- heatmap.2(
     as.matrix(subset_data_subdomain_UP),
     Rowv = TRUE,
     Colv = FALSE,
     col = color_scale3,
     scale = "none",
     trace = "none",
     margins = c(20, 50),
     key = TRUE,
     keysize = .50,
     cexCol = 2.5,
     srtCol = 45,
     cexRow = 2.5,
    cellnote=round(subset_data_subdomain_UP, 2),
    notecex=2,
    notecol="black",
    dendrogram = "row",
    main = "subdomains Present in atleast 5% Individuals"
)
plot of chunk unnamed-chunk-1
# Create the heatmap with filtered row names
DOWN <- heatmap.2(
     as.matrix(subset_data_subdomain_DOWN),
     Rowv = TRUE,
     Colv = FALSE,
     col = color_scale6,
     scale = "none",
     trace = "none",
     margins = c(20, 50),
     key = TRUE,
     keysize = .50,
     cexCol = 2.5,
     srtCol = 45,
     cexRow = 2.5,
    cellnote=round(subset_data_subdomain_DOWN, 2),
    notecex=2,
    notecol="black",
    dendrogram = "row",
    main = "subdomains Present in atleast 5% Individuals"
)
plot of chunk unnamed-chunk-1

Comparative analysis between/within group for associated subdomains (subdomain)

#

Comparative analysis between/within group for associated subdomains (subdomain)

########################################################################################################################################################################################################
# Male vs Female

# Define a custom color palette with distinct colors for each biodomain (different type of Biodomain - color define by Greg Carry)
custom_colors <- c("Apoptosis" = "#673399", "APP Metabolism" = "#fe6500", "Autophagy" = "#9931fd", "Cell Cycle" = "#18857f", "DNA Repair" = "#f451ad", "Endolysosome" = "#3466cc", "Epigenetic" = "#cb3233", "Immune Response" = "#9ccdcc", "Lipid Metabolism" = "#989898", "Metal Binding and Homeostasis" = "#4b0d20", "Mitochondrial Metabolism" = "#97cb98", "Myelination" = "#996735", "Oxidative Stress" = "#ffcd66", "Proteostasis" = "#c8b269", "RNA Spliceosome" = "#0c9aff", "Structural Stabilization" = "#ff9a9a", "Synapse" = "#329a33", "Tau Homeostasis" = "#cb97cb", "Vasculature" = "#cecd02", "none" = "#7f7f7f")

#  Male vs Female AD
# Open the subdomain file where UP (+) and DOWN (-) subdomains are merged together in a single column and the data is separated as a comparison for Male vs Female subdomains
#data_subdomain_MvF_AD <- read.csv("Male_Female_comarative_subdomain_results.csv", sep =",", header = TRUE, stringsAsFactors = FALSE)

data_subdomain_MvF_AD <- read.csv("Male_Female_comarative_subdomain_results_050525.csv", sep =",", header = TRUE, stringsAsFactors = FALSE)


head(data_subdomain_MvF_AD, 5)
##   Sno   unique_id      Biodomain                              Subdomain_initial
## 1   1     AM_ac_4 APP Metabolism                         amyloid-beta clearance
## 2   2     AM_af_5 APP Metabolism                         amyloid-beta formation
## 3   3  AM_appbp_2 APP Metabolism amyloid precursor protein biosynthetic process
## 4   4  AM_appmp_3 APP Metabolism    amyloid precursor protein metabolic process
## 5   5 AM_others_1 APP Metabolism                                          other
##                                          Subdomain
## 1                         amyloid-beta clearance +
## 2                         amyloid-beta formation +
## 3 amyloid precursor protein biosynthetic process +
## 4    amyloid precursor protein metabolic process +
## 5                          APP_Metabolism_others +
##                                     Biodomain_Subdomain_initial
## 1                         APP Metabolism_amyloid-beta clearance
## 2                         APP Metabolism_amyloid-beta formation
## 3 APP Metabolism_amyloid precursor protein biosynthetic process
## 4    APP Metabolism_amyloid precursor protein metabolic process
## 5                                         APP Metabolism _other
##                                               Biodomain_Subdomain Direction
## 1                         APP_Metabolism_amyloid-beta_clearance +        UP
## 2                         APP_Metabolism_amyloid-beta_formation +        UP
## 3 APP_Metabolism_amyloid_precursor_protein_biosynthetic_process +        UP
## 4    APP_Metabolism_amyloid_precursor_protein_metabolic_process +        UP
## 5                                         APP_Metabolism_others +        UP
##      Female       Male Difference
## 1 0.1015625 0.08510638 0.01645612
## 2 0.1875000 0.00000000 0.18750000
## 3 0.0000000 0.10638298 0.10638298
## 4 0.0234375 0.04255319 0.01911569
## 5 0.0234375 0.04255319 0.01911569
# Plot the data as a comparison for Male vs Female subdomains
ggplot(data_subdomain_MvF_AD, aes(x = Female, y = Male, color = Biodomain, size = Difference)) +
geom_point(aes(color=Biodomain, size = Difference), alpha=0.6, stroke = 0) +
 theme_bw() +
    # Conditionally label points where AD or AsymAD values are greater than 0.05
    geom_text_repel(
        aes(label = ifelse((Male > 0.10 & Female < 0.05) | (Female > 0.07 & Male < 0.05) | (Female > 0.07 & Male > 0.07), Biodomain_Subdomain, "")),
       size = 6, max.overlaps = Inf
    ) +

    scale_color_manual(values = custom_colors) +
    labs(color = "AD Biodomain") +
    # Add x = y line
    geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray", size = 0.5) +
    # Add horizontal and vertical dashed lines
    geom_hline(yintercept = 0.05, linetype = "dashed", color = "gray") +
    geom_hline(yintercept = 0.10, linetype = "dashed", color = "red") +
    geom_vline(xintercept = 0.05, linetype = "dashed", color = "gray") +
    geom_vline(xintercept = 0.10, linetype = "dashed", color = "red") +
    labs(title = "Subdomain Comparisons for Female_AD vs Male_AD", x = "Female (% of subjects)", y = "Male (% of subjects)") +
    theme(
    text = element_text(size = 30),
    legend.position = "right",
    axis.text = element_text(size = 30),
    axis.title = element_text(size = 30)
  )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
plot of chunk unnamed-chunk-2
########################################################################################################################################################################################################

# Wrap long Subdomain names
#data_subdomain_MvF_AD <- data_subdomain_MvF_AD %>%
    #mutate(Subdomain_wrapped = str_wrap(Subdomain, width = 25))

ggplot(data_subdomain_MvF_AD, aes(x = Male, y = Female, color = Biodomain, size = Difference)) +
    geom_point(alpha = 0.6, stroke = 0) +  # Increased alpha for better visibility
    theme_bw() +

    # Conditionally label points where |Male - Female| >= 0.05
    geom_text_repel(
        aes(label = ifelse((abs(Female - Male) >= 0.07) | (Female > 0.07 & Male > 0.07), Subdomain, "")),
        size = 8,
        max.overlaps = Inf  # Ensure all significant labels are shown
    ) +

   scale_size(range = c(.1, 10), name="% Difference") +

    scale_color_manual(values = custom_colors) +

    labs(color = "AD Biodomain") +

    # Add x = y reference line
    geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray", linewidth = 0.5) +

    # Add horizontal and vertical threshold lines
    geom_hline(yintercept = 0.05, linetype = "dashed", color = "gray") +
    geom_hline(yintercept = 0.10, linetype = "dashed", color = "red") +
    geom_vline(xintercept = 0.05, linetype = "dashed", color = "gray") +
    geom_vline(xintercept = 0.10, linetype = "dashed", color = "red") +

    labs(title = "Subdomain Comparisons for Female_AD vs Male_AD",
         x = "Male (% of subjects)",
         y = "Female (% of subjects)") +

    theme(
        text = element_text(size = 30),
        legend.position = "right",
        axis.text = element_text(size = 30),
        axis.title = element_text(size = 30)
    ) +
  theme(legend.text=element_text(size=16))
plot of chunk unnamed-chunk-2
########################################################################################################################################################################################################

# Calculate difference dynamically (Female - Male)
data_subdomain_MvF_AD <- data_subdomain_MvF_AD %>%
    mutate(Difference = Female - Male) %>%  # Compute difference
    arrange(Difference)  # Sort data: Male-dominant (negative) first, Female-dominant (positive) last

# Select top pathways for better visualization
top_n <- 20  # Adjust this number as needed
data_top <- data_subdomain_MvF_AD[c(1:(top_n/2), (nrow(data_subdomain_MvF_AD) - (top_n/2) + 1):nrow(data_subdomain_MvF_AD)), ]

# Create a diverging bar plot with value labels
ggplot(data_top, aes(x = reorder(Biodomain_Subdomain, Difference), y = Difference, fill = Difference > 0)) +
    geom_bar(stat = "identity", show.legend = FALSE) +

    coord_flip() +  # Flip for horizontal bars
    scale_fill_manual(values = c("blue", "red")) +  # Blue = Male-dominant, Red = Female-dominant
    theme_minimal() +
    labs(title = "Top Male vs Female Subdomains",
         #subtitle = "Red = More Female-Dominant | Blue = More Male-Dominant",
         x = "Biodomain_Subdomain",
         y = "% Difference") +
    theme(text = element_text(size = 14),
          axis.text = element_text(size = 14))
plot of chunk unnamed-chunk-2
ggplot(data_top, aes(x = reorder(Biodomain_Subdomain, Difference), y = Difference, fill = Direction)) +
    geom_bar(stat = "identity", show.legend = TRUE) +
    geom_text(aes(label = paste0(round(abs(Difference) * 100, 1), "%")),  # absolute value ×100 + % sign
              hjust = ifelse(data_top$Difference >= 0, -0.1, 1.1),        # position text left or right
              size = 8) +
    coord_flip() +  # horizontal bars
    scale_fill_manual(values = c("DOWN" = "lightcoral", "UP" = "palegreen")) +
    theme_minimal() +
    labs(title = "Top Male vs Female Subdomains",
         x = "Biodomain_Subdomain",
         y = "% Difference",
         fill = "Direction") +
    theme(text = element_text(size = 30),
          axis.text = element_text(size = 20)) +
    expand_limits(y = c(min(data_top$Difference) * 1.2, max(data_top$Difference) * 1.2))
plot of chunk unnamed-chunk-2
#######################################################################################################################################################################################################
library(ggplot2)
library(dplyr)
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
## 
##     smiths
# Select top N subdomains by absolute difference
top_n <- 20
data_top <- data_subdomain_MvF_AD %>%
    mutate(Difference = Female - Male,
           AbsDifference = abs(Difference)) %>%
    arrange(desc(AbsDifference)) %>%
    slice(1:top_n)

# Reshape to long format (Female and Male side-by-side)
data_long <- data_top %>%
    select(Biodomain_Subdomain_initial, Direction, Female, Male) %>%
    pivot_longer(cols = c(Female, Male), names_to = "Group", values_to = "Value") %>%
    mutate(
        Percent = Value * 100,
        # Invert values for DOWN direction to go on the left
        PlotValue = ifelse(Direction == "DOWN", -Percent, Percent),
        Group = factor(Group, levels = c("Female", "Male"))
    )

# Plot
ggplot(data_long, aes(x = reorder(Biodomain_Subdomain_initial, PlotValue), y = PlotValue)) +
    geom_segment(aes(xend = Biodomain_Subdomain_initial, y = 0, yend = PlotValue, color = Group),
                 size = 1.5) +
    geom_point(aes(color = Group), size = 5) +
    geom_text(aes(label = paste0(round(abs(PlotValue), 1), "%"), color = Group),
              hjust = ifelse(data_long$PlotValue > 0, -0.2, 1.2),
              nudge_x = 0.4,
              size = 5) +
    coord_flip() +
    scale_color_manual(values = c("Female" = "hotpink4", "Male" = "lightblue4")) +
    labs(
        title = "Top 10 Diverging Subdomains: Female vs Male",
        x = "Biodomain (Subdomain)",
        y = "Percentage (Direction-Based)",
        color = "Group"
    ) +
    theme_minimal(base_size = 15) +
    theme(
        text = element_text(size = 18),
        axis.text.y = element_text(size = 14),
        legend.position = "top"
    ) +
    expand_limits(y = c(-max(data_long$Percent) * 1.2, max(data_long$Percent) * 1.2))
plot of chunk unnamed-chunk-2
########################################################################################################################################################################################################
########################################################################################################################################################################################################
#  AD vs AsymAD 
# Open the subdomains where the Male and Female percentages are combined together to get AD and AsymAD specific subdomains
# Further, UP (+) and DOWN (-) subdomains are merged together in a single column and the data is separated as a comparison for AD vs AsymAD subdomains

#data_subdomain_ADvAsymAD <- read.csv("AD_AsymAD_comarative_subdomain_results.csv", sep =",", header = TRUE, stringsAsFactors = FALSE)

data_subdomain_ADvAsymAD <- read.csv("AD_AsymAD_comarative_subdomain_results_050525.csv", sep =",", header = TRUE, stringsAsFactors = FALSE)


head(data_subdomain_ADvAsymAD, 5)
##   Sno   unique_id      Biodomain                              Subdomain_initial
## 1   1     AM_ac_4 APP Metabolism                         amyloid-beta clearance
## 2   2     AM_af_5 APP Metabolism                         amyloid-beta formation
## 3   3  AM_appbp_2 APP Metabolism amyloid precursor protein biosynthetic process
## 4   4  AM_appmp_3 APP Metabolism    amyloid precursor protein metabolic process
## 5   5 AM_others_1 APP Metabolism                                          other
##                                          Subdomain
## 1                         amyloid-beta clearance +
## 2                         amyloid-beta formation +
## 3 amyloid precursor protein biosynthetic process +
## 4    amyloid precursor protein metabolic process +
## 5                          APP_Metabolism_others +
##                                     Biodomain_Subdomain_initial
## 1                         APP Metabolism_amyloid-beta clearance
## 2                         APP Metabolism_amyloid-beta formation
## 3 APP Metabolism_amyloid precursor protein biosynthetic process
## 4    APP Metabolism_amyloid precursor protein metabolic process
## 5                                          APP Metabolism_other
##                                               Biodomain_Subdomain Direction
## 1                         APP_Metabolism_amyloid-beta_clearance +        UP
## 2                         APP_Metabolism_amyloid-beta_formation +        UP
## 3 APP_Metabolism_amyloid_precursor_protein_biosynthetic_process +        UP
## 4    APP_Metabolism_amyloid_precursor_protein_metabolic_process +        UP
## 5                                         APP_Metabolism_others +        UP
##           AD     AsymAD Difference
## 1 0.18666888 0.09949479 0.08717409
## 2 0.18750000 0.19001959 0.00251959
## 3 0.10638298 0.08825652 0.01812646
## 4 0.06599069 0.02897206 0.03701863
## 5 0.06599069 0.04783998 0.01815071
# Plot the data as a comparison for AD vs AsymAD pathways

ggplot(data_subdomain_ADvAsymAD, aes(x = AD, y = AsymAD, color = Biodomain, size = Difference)) +
    geom_point(aes(color = Biodomain, size = Difference), alpha = 0.3, stroke = 0) +
    theme_bw() +
    # Conditionally label points based on the specified conditions
    geom_text_repel(
        aes(label = ifelse((AD > 0.10 & Difference > 0.05) | (AsymAD > 0.10 & Difference > 0.05) | (AD > 0.15 & AsymAD > 0.15), Subdomain, "")),
        size = 8
    ) +
    scale_color_manual(values = custom_colors) +
    labs(color = "AD Biodomain") +
    # Add x = y line
    geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray", size = 0.5) +
    # Add horizontal and vertical dashed lines
    geom_hline(yintercept = 0.05, linetype = "dashed", color = "gray") +
    geom_hline(yintercept = 0.10, linetype = "dashed", color = "red") +
    geom_vline(xintercept = 0.05, linetype = "dashed", color = "gray") +
    geom_vline(xintercept = 0.10, linetype = "dashed", color = "red") +
    labs(title = "Subdomain Comparisons for AD vs AsymAD", x = "AD (% of subjects)", y = "AsymAD (% of subjects)") +
    theme(
    text = element_text(size = 30),
    legend.position = "right",
    axis.text = element_text(size = 30),
    axis.title = element_text(size = 30)
  )
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
plot of chunk unnamed-chunk-2
########################################################################################################################################################################################################


# Wrap long Subdomain names
#data_subdomain_ADvAsymAD <- data_subdomain_ADvAsymAD %>%
    #mutate(Subdomain_wrapped = str_wrap(Subdomain, width = 25))


ggplot(data_subdomain_ADvAsymAD, aes(x = AD, y = AsymAD, color = Biodomain, size = Difference)) +
    geom_point(alpha = 0.3, stroke = 0) +  # Points with transparency
    theme_bw() +

    # Conditionally label points where |AD - AsymAD| >= 0.05
    geom_text_repel(
        aes(label = ifelse((abs(AD - AsymAD) >= 0.06) | (AD > 0.15 & AsymAD > 0.15), Subdomain, "")),
        size = 8, max.overlaps = Inf
    ) +

    scale_size(range = c(.1, 10), name="% Difference") +

    scale_color_manual(values = custom_colors) +
    labs(color = "AD Biodomain") +

    # Add x = y reference line
    geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray", size = 0.5) +

    # Add horizontal and vertical threshold lines
    geom_hline(yintercept = 0.05, linetype = "dashed", color = "gray") +
    geom_hline(yintercept = 0.10, linetype = "dashed", color = "red") +
    geom_vline(xintercept = 0.05, linetype = "dashed", color = "gray") +
    geom_vline(xintercept = 0.10, linetype = "dashed", color = "red") +

    # Title and axis labels
    labs(title = "Subdomain Comparisons for AD vs AsymAD",
         x = "AD (% of subjects)",
         y = "AsymAD (% of subjects)") +

    # Theme customization
    theme(
        text = element_text(size = 30),
        legend.position = "right",
        axis.text = element_text(size = 30),
        axis.title = element_text(size = 30)
    ) +
  theme(legend.text=element_text(size=16))
plot of chunk unnamed-chunk-2
#######################################################################################################################################################################################################

# Calculate difference dynamically (AD - AsuymAD)
data_subdomain_ADvAsymAD <- data_subdomain_ADvAsymAD %>%
    mutate(Difference = AD - AsymAD) %>%  # Compute difference
    arrange(Difference)  # Sort data: AD-dominant (negative) first, AsymAD-dominant (positive) last

# Select top pathways for better visualization
top_n <- 20  # Adjust this number as needed
data_top <- data_subdomain_ADvAsymAD[c(1:(top_n/2), (nrow(data_subdomain_ADvAsymAD) - (top_n/2) + 1):nrow(data_subdomain_ADvAsymAD)), ]

# Create a diverging bar plot with value labels

ggplot(data_top, aes(x = reorder(Biodomain_Subdomain_initial, Difference), y = Difference, fill = Difference > 0)) +
    geom_bar(stat = "identity", show.legend = FALSE) +

    coord_flip() +  # Flip for horizontal bars
    scale_fill_manual(values = c("blue", "red")) +  # Blue = Male-dominant, Red = Female-dominant
    theme_minimal() +
    labs(title = "Top AD vs AsymAD Subdomains",
         x = "Biodomain_Subdomain",
         y = "% Difference") +
    theme(text = element_text(size = 14),
          axis.text = element_text(size = 14))
plot of chunk unnamed-chunk-2
ggplot(data_top, aes(x = reorder(Biodomain_Subdomain, Difference), y = Difference, fill = Direction)) +
    geom_bar(stat = "identity", show.legend = TRUE) +
    geom_text(aes(label = paste0(round(abs(Difference) * 100, 1), "%")),  # absolute value ×100 + % sign
              hjust = ifelse(data_top$Difference >= 0, -0.1, 1.1),        # position text left or right
              size = 8) +
    coord_flip() +  # horizontal bars
    scale_fill_manual(values = c("DOWN" = "lightcoral", "UP" = "palegreen")) +
    theme_minimal() +
    labs(title = "Top AD vs AsymAD Subdomains",
         x = "Biodomain (Subdomain)",
         y = "% Difference",
         fill = "Direction") +
    theme(text = element_text(size = 30),
          axis.text = element_text(size = 20)) +
    expand_limits(y = c(min(data_top$Difference) * 1.2, max(data_top$Difference) * 1.2))
plot of chunk unnamed-chunk-2
#######################################################################################################################################################################################################

library(ggplot2)
library(dplyr)
library(tidyr)

# Select top N subdomains by absolute difference
top_n <- 20
data_top <- data_subdomain_ADvAsymAD %>%
    mutate(Difference = AD - AsymAD,
           AbsDifference = abs(Difference)) %>%
    arrange(desc(AbsDifference)) %>%
    slice(1:top_n)

# Reshape to long format (AD and AsymAD side-by-side)
data_long <- data_top %>%
    select(Biodomain_Subdomain_initial, Direction, AD, AsymAD) %>%
    pivot_longer(cols = c(AD, AsymAD), names_to = "Group", values_to = "Value") %>%
    mutate(
        Percent = Value * 100,
        # Invert values for DOWN direction to go on the left
        PlotValue = ifelse(Direction == "DOWN", -Percent, Percent),
        Group = factor(Group, levels = c("AD", "AsymAD"))
    )

# Plot
ggplot(data_long, aes(x = reorder(Biodomain_Subdomain_initial, PlotValue), y = PlotValue)) +
    geom_segment(aes(xend = Biodomain_Subdomain_initial, y = 0, yend = PlotValue, color = Group),
                 size = 1.5) +
    geom_point(aes(color = Group), size = 5) +
    geom_text(aes(label = paste0(round(abs(PlotValue), 1), "%"), color = Group),
              hjust = ifelse(data_long$PlotValue > 0, -0.2, 1.2),
              nudge_x = 0.4,
              size = 5) +
    coord_flip() +
    scale_color_manual(values = c("AD" = "purple4", "AsymAD" = "mediumpurple1")) +
    labs(
        title = "Top 10 Diverging Subdomains: AD vs AsymAD",
        x = "Biodomain (Subdomain)",
        y = "Percentage (Direction-Based)",
        color = "Group"
    ) +
    theme_minimal(base_size = 15) +
    theme(
        text = element_text(size = 18),
        axis.text.y = element_text(size = 14),
        legend.position = "top"
    ) +
    expand_limits(y = c(-max(data_long$Percent) * 1.2, max(data_long$Percent) * 1.2))
plot of chunk unnamed-chunk-2
######################################################################################################################################################################################################## ######################################################################################################################################################################################################## # Transcriptomics vs Proteomics # Open the subdomains where the Male and Female percentages are combined together to get AD specific subdomains in transcriptomics and proteomics analysis # Further, UP (+) and DOWN (-) subdomains are merged together in a single column and the data is separated as a comparison for AD vs AsymAD subdomains data_subdomain_AD_TvsP <- read.csv("AD_trans_prot_comp.csv", sep =",", header = TRUE, stringsAsFactors = FALSE) head(data_subdomain_AD_TvsP, 5) ggplot(data_subdomain_AD_TvsP, aes(x = AD_Transcriptome, y = AD_proteome, color = Biodomain, size = Difference)) + geom_point(alpha = 0.3, stroke = 0) + # Points with transparency theme_bw() + # Conditionally label points where |AD_Transcriptome - AD_proteome| >= 0.05 geom_text_repel( aes(label = ifelse((abs(AD_Transcriptome - AD_proteome) >= 0.05) | (AD_Transcriptome > 0.10 & AD_proteome > 0.10), Biodomain_Subdomain, "")), size = 6 ) + scale_color_manual(values = custom_colors) + labs(color = "Biodomain") + # Add x = y reference line geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray", size = 0.5) + # Add horizontal and vertical threshold lines geom_hline(yintercept = 0.05, linetype = "dashed", color = "gray") + geom_hline(yintercept = 0.10, linetype = "dashed", color = "red") + geom_vline(xintercept = 0.05, linetype = "dashed", color = "gray") + geom_vline(xintercept = 0.10, linetype = "dashed", color = "red") + # Title and axis labels labs(title = "Subdomain Comparisons for Transcriptomics vs Proteomics", x = "AD_Transcriptome (% of subjects)", y = "AD_proteome (% of subjects)") + # Theme customization theme( text = element_text(size = 30), legend.position = "right", axis.text = element_text(size = 30), axis.title = element_text(size = 30) ) ######################################################################################################################################################################################################## # Transcriptomics vs Proteomics # All eight groups comparison together library(ggplot2) library(dplyr) library(tidyr) library(ggrepel) Cases_TvsP <- read.csv("Cases_TvsP.csv", sep =",", header = TRUE, stringsAsFactors = FALSE) head(Cases_TvsP, 5) # Identify column pairs that have "_T" and "_P" column_pairs <- unique(gsub("(_T|_P)$", "", names(Cases_TvsP))) # Create a list to store transformed data plot_data <- list() # Debugging: Print identified column pairs print(column_pairs) # Loop through column pairs and extract matching "_T" and "_P" columns for (col in column_pairs) { T_col <- paste0(col, "_T") P_col <- paste0(col, "_P") # Debugging: Check if columns exist if (T_col %in% names(Cases_TvsP) & P_col %in% names(Cases_TvsP)) { print(paste("Processing:", T_col, "vs", P_col)) # Debugging info # Identify AD vs AsymAD group group_type <- ifelse(grepl("AD_", col), "AD", "AsymAD") temp_df <- Cases_TvsP %>% select(unique_id, Biodomain, all_of(T_col), all_of(P_col)) %>% rename(X = all_of(T_col), Y = all_of(P_col)) %>% mutate(Difference = abs(X - Y), Comparison = col, # Store column name Group = group_type) # Mark whether it's AD or AsymAD plot_data[[col]] <- temp_df # Store in list } else { print(paste("Skipping:", T_col, "or", P_col, "not found")) # Debugging info } } # Combine all transformed data frames final_plot_data <- bind_rows(plot_data) # Check if final_plot_data is empty if (nrow(final_plot_data) == 0) { stop("Error: No valid data available for plotting. Check column names in Cases_TvsP.") } # Ensure `Comparison` is a factor with the correct order: AD groups first, then AsymAD groups final_plot_data$Comparison <- factor(final_plot_data$Comparison, levels = unique(final_plot_data$Comparison)) # Ensure AD groups appear first in facets final_plot_data$Group <- factor(final_plot_data$Group, levels = c("AD", "AsymAD")) # Generate the scatter plot with facets ggplot(final_plot_data, aes(x = X, y = Y, size = Difference, color = Biodomain)) + geom_point(alpha = 0.5) + scale_color_manual(values = custom_colors) + # Apply custom colors geom_text_repel(aes(label = unique_id), size = 5) + # Add labels theme_bw() + facet_wrap(~ Comparison, scales = "free", ncol = 4) + # Order based on `Comparison` labs( title = "Comparison of Transcriptomic vs Proteomic Ratios (AD and AsymAD)", x = "Transcriptome (% of subjects)", y = "Proteome (% of subjects)", size = "% difference between omics", color = "Biodomain" ) + theme( text = element_text(size = 20), legend.position = "right", axis.text = element_text(size = 20), axis.title = element_text(size = 30), #strip.text = element_text(face = "bold") ) + geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray", size = 0.5) # Add x = y line ######################################################################################################################################################################################################## end.rcode-->

Graph-based Clustering using subdomain Specific Z score Distributions Results (AD male)

# # create igraph S3 object net_pos_AD_male <- graph.data.frame(sig_subdomain_AD_male_pos_edge, directed = FALSE) # Get unique vertices from biodomain_subdomain_annotation and variable term_name_vertices <- unique(sig_subdomain_AD_male_pos_edge$biodomain_subdomain_annotation) variable_vertices <- unique(sig_subdomain_AD_male_pos_edge$variable) # Assign shape "square" to term_name vertices and "circle" to variable vertices vertex_shapes <- rep("circle", vcount(net_pos_AD_male)) # Initialize all vertices as circles vertex_shapes[V(net_pos_AD_male)$name %in% term_name_vertices] <- "square" # Assign square shape to Var1 vertices # store original margins orig_mar <- par()$mar # set new margins to limit whitespace in plot par(mar=rep(.1, 4)) # plot the igraph S3 object # We can use various layout algorithms in igraph to generate the layout, such as Fruchterman-Reingold layout (layout_with_fr), Kamada-Kawai layout (layout_with_kk), or circular layout (layout_in_circle) plot(net_pos_AD_male, layout = layout_with_fr(net_pos_AD_male), edge.width = E(net_pos_AD_male)$value, vertex.label.cex = .5, vertex.size = 3, vertex.shape = vertex_shapes) # Export your igraph object as an edge list write.graph(net_pos_AD_male, file = "net_pos_AD_male.graphml", format = "graphml") # community detection based on edge betweenness (Newman-Girvan) #ceb_pos_AD_male <- cluster_edge_betweenness(net_pos_AD_male, weights = E(net_pos_AD_male)$value) # community detection based on Infomap Algorithm #ceb_pos_AD_male <- cluster_infomap(net_pos_AD_male, e.weights = E(net_pos_AD_male)$value) # community detection based on Louvain Clustering (Modularity Optimization) ceb_pos_AD_male <- cluster_louvain(net_pos_AD_male, weights = E(net_pos_AD_male)$value) class(ceb_pos_AD_male) # plot the igraph S3 objec twith sub-cluster plot(ceb_pos_AD_male, net_pos_AD_male, layout = layout_with_fr(net_pos_AD_male), edge.width = E(net_pos_AD_male)$value, vertex.label.cex = .5, vertex.size = 3, vertex.shape = vertex_shapes) # community membership for each node par(mar=orig_mar) dendPlot(ceb_pos_AD_male, mode="hclust", cex=.5) # Get the community membership data community_membership_pos_AD_male <- membership(ceb_pos_AD_male) # Convert it to a data frame with two columns: Node and Community community_df_pos_AD_male <- data.frame(Node = names(community_membership_pos_AD_male), Community = as.integer(community_membership_pos_AD_male)) # Count the number of community detected unique(community_df_pos_AD_male$Community) # Write the data frame to a file write.table(community_df_pos_AD_male, file = "community_membership_subdomain_pos_AD_male.txt", row.names = FALSE, col.names = TRUE, sep = "\t") ################################################################################################################################################## # Make individual community as example # Subset the graph for Community 5 community_5_nodes <- V(net_pos_AD_male)[community_df_pos_AD_male$Community == 5] # Select nodes from Community 5 # Induce a subgraph with only Community 5 nodes subgraph_community_5 <- induced_subgraph(net_pos_AD_male, community_5_nodes) # Assign "circle" shape to all vertices V(subgraph_community_5)$shape <- "circle" # Set "square" shape for vertices that match the term_name_vertices V(subgraph_community_5)$shape[V(subgraph_community_5)$name %in% term_name_vertices] <- "square" # Assign "blue" color to all vertices (default for circles) V(subgraph_community_5)$color <- "lightblue" # Set "red" color for vertices that are squares V(subgraph_community_5)$color[V(subgraph_community_5)$shape == "square"] <- "khaki1" # set new margins to limit whitespace in plot #par(mar=rep(.1, 4)) # Plot the subgraph for Community 5 plot(subgraph_community_5, layout = layout_with_fr(subgraph_community_5), edge.width = E(net_pos_AD_male)$value, vertex.size = 10, vertex.label.cex = 1, vertex.label.color = "black", main = "Community 5 Subgraph") # Convert igraph object to tbl_graph (a tidygraph object) #graph_tbl <- as_tbl_graph(subgraph_community_5) # Create the ggraph plot #ggraph(graph_tbl, layout = 'fr') + # Fruchterman-Reingold layout #geom_edge_link(aes(width = E(subgraph_community_5)$value), alpha = 0.5) + # Edges #geom_node_point(aes(color = V(subgraph_community_5)$color, shape = V(subgraph_community_5)$shape), size = 10) + # Vertices #geom_node_text(aes(label = V(subgraph_community_5)$name), repel = TRUE, size = 10) + # Non-overlapping labels #theme_void() + # Minimalist theme #scale_shape_manual(values = c("circle" = 16, "square" = 15)) + # Map shapes #scale_color_manual(values = c("lightblue", "khaki1")) # Map colors ################################################################################################################################################## # Make heatmap for the comunitties # Select the biodomain_subdomain_annotation and variable entities from the sig_subdomain_AD_male_pos_edge file term_name <- sig_subdomain_AD_male_pos_edge$biodomain_subdomain_annotation variable <- sig_subdomain_AD_male_pos_edge$variable # Separate term_name and variable entities based on their community membership and sort the Community column in ascending order term_name_nodes <- community_df_pos_AD_male[community_df_pos_AD_male$Node %in% term_name, ] term_name_nodes <- term_name_nodes[order(term_name_nodes$Community), ] variable_nodes <- community_df_pos_AD_male[community_df_pos_AD_male$Node %in% variable, ] variable_nodes <- variable_nodes[order(variable_nodes$Community), ] head(term_name_nodes) head(variable_nodes) # Prepare the row and col entries for a heatmap based on term_name and variable entities row_names <- unique(c(term_name_nodes$Node)) col_names <- unique(c(variable_nodes$Node)) # Create a matrix filled with zeros matrix_data_AD_male_pos <- matrix(0, nrow = length(row_names), ncol = length(col_names), dimnames = list(row_names, col_names)) # Populate the matrix based on the information in term_name_nodes and variable_nodes for (i in 1:nrow(term_name_nodes)) { matrix_data_AD_male_pos[term_name_nodes[i, "Node"], variable_nodes[variable_nodes$Community == term_name_nodes[i, "Community"], "Node"]] <- variable_nodes[variable_nodes$Community == term_name_nodes[i, "Community"], "Community"] } write.table(matrix_data_AD_male_pos,"subdomain_matrix_data_pos_AD_male.txt",sep="\t",quote=F) # Create heatmap with matrix matrix_data_AD_male_pos <- as.matrix(matrix_data_AD_male_pos) # Adjust the margins to make room for the X-axis labels par(mar = c(5, 5, 4, 2) + 0.1) heatmap(matrix_data_AD_male_pos, Rowv = NA, Colv = NA, cex.axis = 0.5, main = "Communities identified by Graph Based Clustering for subdomain Comparative subdomain Enrichment Analysis (Male UP subdomains)") ######################################################################################################################################################### # tranfer the data matrix into two column interaction data sig_subdomain_AD_male_neg_edge <- melt(sig_subdomain_AD_male_neg) dim(sig_subdomain_AD_male_neg_edge) # Remove the edges where value (Z score) is "NA" sig_subdomain_AD_male_neg_edge <- filter(sig_subdomain_AD_male_neg_edge, !is.na(value)) dim(sig_subdomain_AD_male_neg_edge) # Convert the negative values in the 'value' column to absolute value for network construction sig_subdomain_AD_male_neg_edge$value <- abs(sig_subdomain_AD_male_neg_edge$value) # create igraph S3 object net_neg_AD_male <- graph.data.frame(sig_subdomain_AD_male_neg_edge, directed = FALSE) # Get unique vertices from biodomain_subdomain_annotation and variable term_name_vertices <- unique(sig_subdomain_AD_male_neg_edge$biodomain_subdomain_annotation) variable_vertices <- unique(sig_subdomain_AD_male_neg_edge$variable) # Assign shape "square" to term_name vertices and "circle" to variable vertices vertex_shapes <- rep("circle", vcount(net_neg_AD_male)) # Initialize all vertices as circles vertex_shapes[V(net_neg_AD_male)$name %in% term_name_vertices] <- "square" # Assign square shape to Var1 vertices # store original margins orig_mar <- par()$mar # set new margins to limit whitespace in plot par(mar=rep(.1, 4)) # plot the igraph S3 object # We can use various layout algorithms in igraph to generate the layout, such as Fruchterman-Reingold layout (layout_with_fr), Kamada-Kawai layout (layout_with_kk), or circular layout (layout_in_circle) plot(net_neg_AD_male, layout = layout_with_fr(net_neg_AD_male), edge.width = E(net_neg_AD_male)$value, vertex.label.cex = .5, vertex.size = 3, vertex.shape = vertex_shapes) # community detection based on Weighted Newman-Girvan (Edge Betweenness) #ceb_neg_AD_male <- cluster_edge_betweenness(net_neg_AD_male, weights = E(net_neg_AD_male)$value) # community detection based on Louvain Clustering (Modularity Optimization) ceb_neg_AD_male <- cluster_louvain(net_neg_AD_male, weights = E(net_neg_AD_male)$value) # community detection based on Infomap Algorithm #ceb_neg_AD_male <- cluster_infomap(net_neg_AD_male, e.weights = E(net_neg_AD_male)$value) # community detection based on Walktrap Algorithm #ceb_neg_AD_male <- cluster_walktrap(net_neg_AD_male, weights = E(net_neg_AD_male)$value) class(ceb_neg_AD_male) # plot the igraph S3 objec twith sub-cluster plot(ceb_neg_AD_male, net_neg_AD_male, layout = layout_with_fr(net_neg_AD_male), edge.width = E(net_neg_AD_male)$value, vertex.label.cex = .5, vertex.size = 3, vertex.shape = vertex_shapes) # community membership for each node par(mar=orig_mar) dendPlot(ceb_neg_AD_male, mode="hclust", cex=.5) # Get the community membership data community_membership_neg_AD_male <- membership(ceb_neg_AD_male) # Convert it to a data frame with two columns: Node and Community community_df_neg_AD_male <- data.frame(Node = names(community_membership_neg_AD_male), Community = as.integer(community_membership_neg_AD_male)) # Count the number of community detected unique(community_df_neg_AD_male$Community) # Write the data frame to a file write.table(community_df_neg_AD_male, file = "community_membership_subdomain_neg_AD_male.txt", row.names = FALSE, col.names = TRUE, sep = "\t") # Select the biodomain_subdomain_annotation and variable entities from the sig_subdomain_AD_male_neg_edge file term_name <- sig_subdomain_AD_male_neg_edge$biodomain_subdomain_annotation variable <- sig_subdomain_AD_male_neg_edge$variable # Separate term_name and variable entities based on their community membership and sort the Community column in ascending order term_name_nodes <- community_df_neg_AD_male[community_df_neg_AD_male$Node %in% term_name, ] term_name_nodes <- term_name_nodes[order(term_name_nodes$Community), ] variable_nodes <- community_df_neg_AD_male[community_df_neg_AD_male$Node %in% variable, ] variable_nodes <- variable_nodes[order(variable_nodes$Community), ] head(term_name_nodes) head(variable_nodes) # Prepare the row and col entries for a heatmap based on term_name and variable entities row_names <- unique(c(term_name_nodes$Node)) col_names <- unique(c(variable_nodes$Node)) # Create a matrix filled with zeros matrix_data_AD_male_neg <- matrix(0, nrow = length(row_names), ncol = length(col_names), dimnames = list(row_names, col_names)) # Populate the matrix based on the information in term_name_nodes and variable_nodes for (i in 1:nrow(term_name_nodes)) { matrix_data_AD_male_neg[term_name_nodes[i, "Node"], variable_nodes[variable_nodes$Community == term_name_nodes[i, "Community"], "Node"]] <- variable_nodes[variable_nodes$Community == term_name_nodes[i, "Community"], "Community"] } write.table(matrix_data_AD_male_neg,"subdomain_matrix_data_neg_AD_male.txt",sep="\t",quote=F) # Create heatmap with matrix matrix_data_AD_male_neg <- as.matrix(matrix_data_AD_male_neg) # Adjust the margins to make room for the X-axis labels par(mar = c(5, 5, 4, 2) + 0.1) heatmap(matrix_data_AD_male_neg, Rowv = NA, Colv = NA, cex.axis = 0.5, main = "Communities identified by Graph Based Clustering for subdomain Comparative subdomain Enrichment Analysis (Male DOWN subdomains)") end.rcode--> #

Graph-based Clustering using subdomain Specific Z score Distributions Results (AD female)

#

Save individual matrix file as .tiff format

#