问题描述
我正在调查不同变量的分布及其相关性。有没有办法突出高相关性?例如我可以将大于 0.8 的相关性标记为红色,将小于 -0.8 的相关性标记为蓝色。
解决方法
正如@thefringthing 在他们的评论中所说,这不是一项简单的任务,但绝对可行。
此解决方案基于 this question 和 this answer:
# Load libraries
library(tidyverse)
library(GGally)
# Load some example data
mtcars <- mtcars[,1:6]
# Define function to colour panels according to correlation
cor_func <- function(data,mapping,method,symbol,...){
x <- eval_data_col(data,mapping$x)
y <- eval_data_col(data,mapping$y)
corr <- cor(x,y,method=method,use='complete.obs')
colFn <- colorRampPalette(c("firebrick","white","dodgerblue"),interpolate ='spline')
rampcols <- colFn(100)
match <- c(rampcols[1:10],rep("#FFFFFF",80),rampcols[90:100])
fill <- match[findInterval(corr,seq(-1,1,length = 100))]
ggally_text(
label = paste(symbol,as.character(round(corr,2))),mapping = aes(),xP = 0.5,yP = 0.5,color = 'black',...) +
theme_void() +
theme(panel.background = element_rect(fill = fill))
}
plot1 <- ggpairs(mtcars,upper = list(continuous = wrap(cor_func,method = 'spearman',symbol = "Corr:\n")),lower = list(continuous = function(data,...) {
ggally_smooth_lm(data = data,mapping = mapping)}),diag = list(continuous = function(data,...) {
ggally_densityDiag(data = data,mapping = mapping)}
))
plot1