En el práctico anterior generamos una tabla de conteos a partir de datos de secuenciación y, hoy, aplicaremos pruebas estadísticas para detectar cambios significativos en el número de lecturas (niveles de expresión) entre dos condiciones experimentales. Utilizaremos R y el paquete estadístico DESeq2 para llevar a cabo el análisis de expresión génica diferencial.
Entrar a Torito, y dirigirse a su carpeta donde está la tabla de conteos generada en el práctico anterior
Iniciar R:
R # Iniciar R
library(DESeq2)
library(ggplot2)
library(tidyr)
mrna <- read.delim("./mRNA_counts_filtered.tsv", row.names="Geneid") # Cargar la tabla de conteos, especificando que la columna 'Geneid' contiene los nombres de los genes.
colnames(mrna) = c("Rich_1","Rich_2","Rich_3","Starved_1","Starved_2","Starved_3")
mrna_long <- mrna %>%
pivot_longer(cols = everything(), names_to = "variable", values_to = "value")
tiff(filename="./Boxplot_unnorm.tiff", width=3600, height=3600, units="px", pointsize=6, compression="lzw", res=500,)
ggplot(mrna_long, aes(x = variable, y = value, fill = variable)) +
geom_boxplot() +
scale_fill_manual(values = c(rep("skyblue", 3), rep("orange", 3))) +
theme_minimal() +
scale_y_log10() +
labs(x = "Samples", y = "Raw Counts")
dev.off()
design = data.frame(row.names = colnames(mrna), condition = c(rep("Rich", 3), rep("Starved", 3)))
design
dds = DESeqDataSetFromMatrix(countData = mrna, colData = design, design = ~ condition) # Crear un objeto 'dds' que contiene los datos de conteos y la asignación de cada muestra a un grupo experimental.
dds$condition = relevel(dds$condition, ref = "Rich")
dds <- estimateSizeFactors(dds)
sizeFactors(dds)
# Calcular los factores de normalización y mostrarlos en pantalla.
# Discutir cómo estos factores ajustan las diferencias en el tamaño de las bibliotecas de las muestras.
normalized_mrna <- as.data.frame(counts(dds, normalized=TRUE)) #Extraer matriz de conteos normalizada
normalized_mrna_long <- normalized_mrna %>%
pivot_longer(cols = everything(), names_to = "variable", values_to = "value")
tiff(filename="./Boxplot_norm.tiff", width=3600, height=3600, units="px", pointsize=6, compression="lzw", res=500,)
ggplot(normalized_mrna_long, aes(x = variable, y = value, fill = variable)) +
geom_boxplot() +
scale_fill_manual(values = c(rep("skyblue", 3), rep("orange", 3))) +
theme_minimal() + scale_y_log10() +
labs(x = "Samples", y = "Normalized Counts")
dev.off()
rld <- rlog(dds, blind=TRUE)
tiff(filename="./plotPCA.tiff", width=1800, height=1800, units="px", pointsize=6, compression="lzw", res=500,)
plotPCA(rld, intgroup="condition") +
ylim(-10, 10) # Ajusta los valores de acuerdo a los datos
dev.off()
### Extract the rlog matrix from the object
rld_mat <- assay(rld)
rld_cor <- cor(rld_mat) ## cor() is a base R function
library(pheatmap)
### Plot heatmap using the correlation matrix and the metadata object
tiff(filename="./plotCorr.tiff", width=3600, height=3600, units="px", pointsize=6, compression="lzw", res=500,)
pheatmap(rld_cor)
dev.off()
dds <- DESeq(dds)
res = as.data.frame(results(dds))
head(res)
res$significance <- ifelse(res$padj < 0.05, "Significant", "Not Significant")
#MA Plot
tiff(filename="./plotMA.tiff", width=3600, height=1800, units="px", pointsize=6, compression="lzw", res=500,)
ggplot(res, aes(x = baseMean, y = log2FoldChange, color = significance)) +
geom_point(alpha = 0.6, size = 1.5) +
scale_x_log10() + # Escala logarítmica en el eje X
scale_color_manual(values = c("Not Significant" = "grey", "Significant" = "red")) +
theme_minimal() +
labs(title = "MA Plot", x = "Mean of Normalized Counts", y = "Log2 Fold Change")
dev.off()
res$significance <- ifelse(res$padj < 0.05 & abs(res$log2FoldChange)>1, "Significant", "Not Significant")
tiff(filename="./plotMA_2.tiff", width=3600, height=1800, units="px", pointsize=6, compression="lzw", res=500,)
ggplot(res, aes(x = baseMean, y = log2FoldChange, color = significance)) +
geom_point(alpha = 0.6, size = 1.5) +
scale_x_log10() + # Escala logarítmica en el eje X
scale_color_manual(values = c("Not Significant" = "grey", "Significant" = "red")) +
theme_minimal() +
labs(title = "MA Plot", x = "Mean of Normalized Counts", y = "Log2 Fold Change")
dev.off()
# Volcano plot
tiff(filename="./VolcanoPlot.tiff", width=3600, height=3600, units="px", pointsize=6, compression="lzw", res=500,)
ggplot(res, aes(x = log2FoldChange, y = -log10(padj), color = significance)) +
geom_point(alpha = 0.6, size = 1.5) +
scale_color_manual(values = c("Not Significant" = "grey", "Significant" = "red")) +
theme_minimal() +
labs(title = "Volcano Plot", x = "Log2 Fold Change", y = "-Log10 Adjusted P-value")
dev.off()
res_fil = res[res$significance == "Significant", ]
up = res_fil[res_fil$log2FoldChange>1, ]
down = res_fil[res_fil$log2FoldChange<(-1), ]
top_genes_up <- head(up[order(up$log2FoldChange, decreasing = TRUE),],30) # Cambia '30' al número de genes que prefieras
top_genes_down <- head(down[order(down$log2FoldChange), ], 30) # Cambia '30' al número de genes que prefieras
top_DEGs = rbind(top_genes_up,top_genes_down)
mat <- normalized_mrna[rownames(top_DEGs), ]
scaled_mat = scale(t(mat))
# Heatmap
tiff(filename="./TopDEGs_Heatmap.tiff", width=3600, height=4200, units="px", pointsize=6, compression="lzw", res=500,)
pheatmap(t(scaled_mat), cluster_rows = TRUE, cluster_cols = TRUE, show_rownames = TRUE, show_colnames = TRUE,
main = "Heatmap of Top Differentially Expressed Genes")
dev.off()
library(clusterProfiler)
library(org.Sc.sgd.db) # Base de datos de anotaciones para Saccharomyces cerevisiae
library(enrichplot)
library(ggplot2)
# Realizar análisis de enriquecimiento GO
go_up <- enrichGO(gene = rownames(up),
OrgDb = org.Sc.sgd.db,
keyType = "GENENAME", # Usa el tipo de clave adecuado ('GENENAME', 'ENSEMBL', 'ENTREZID', etc.)
ont = "BP", # 'BP' para Biological Process, 'MF' para Molecular Function, 'CC' para Cellular Component, o 'ALL'
pAdjustMethod = "BH", # Método de ajuste de p-valor
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
# Visualizar resultados
tiff(filename="./GO_Up_DotPlot.tiff", width=3600, height=3600, units="px", pointsize=6, compression="lzw", res=500,)
dotplot(go_up, showCategory = 20, title = "GO Enrichment for Upregulated Genes") +
theme_minimal()
dev.off()
tiff(filename="./GO_Up_Barplot.tiff", width=3600, height=3600, units="px", pointsize=6, compression="lzw", res=500,)
barplot(go_up, showCategory=20)
dev.off()
tiff(filename="./GO_Up_UpsetPlot.tiff", width=3600, height=3600, units="px", pointsize=6, compression="lzw", res=500,)
upsetplot(go_up)
dev.off()
go_down <- enrichGO(gene = rownames(down),
OrgDb = org.Sc.sgd.db,
keyType = "GENENAME", # Usa el tipo de clave adecuado ('GENENAME', 'ENSEMBL', 'ENTREZID', etc.)
ont = "BP", # 'BP' para Biological Process, 'MF' para Molecular Function, 'CC' para Cellular Component, o 'ALL'
pAdjustMethod = "BH", # Método de ajuste de p-valor
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
# Visualizar resultados
tiff(filename="./GO_Down_DotPlot.tiff", width=3600, height=4200, units="px", pointsize=6, compression="lzw", res=500,)
dotplot(go_down, showCategory = 20, title = "GO Enrichment for Downregulated Genes") +
theme_minimal() +
theme(axis.text.y = element_text(size = 6))
dev.off()
tiff(filename="./GO_Down_BarPlot.tiff", width=3600, height=4600, units="px", pointsize=6, compression="lzw", res=500,)
barplot(go_down, showCategory=20) +
theme(axis.text.y = element_text(size = 6))
dev.off()
tiff(filename="./GO_Down_UpsetPlot.tiff", width=12000, height=3600, units="px", pointsize=6, compression="lzw", res=500,)
upsetplot(go_down)
dev.off()
quit()