问题描述
我是bokeh的新手,但极力尝试将“悬停”选项应用于Box-Whisker图。 当我将鼠标悬停在Box字形上时,我试图显示Q1,Q2,Q3和iqr值,但尝试失败,并且对创建Box-Whisker图的组成部分的过程感到困惑
我正在使用bokeh文档中提供的示例。
import numpy as np
import pandas as pd
from bokeh.models import ColumnDataSource,Grid,Linearaxis,Plot,Quad,Range1d,HoverTool,Panel,Tabs,Legend,LegendItem
from bokeh.plotting import figure,output_file,show
# generate some synthetic time series for six different categories
cats = list("abcdef")
yy = np.random.randn(2000)
g = np.random.choice(cats,2000)
for i,l in enumerate(cats):
yy[g == l] += i // 2
df = pd.DataFrame(dict(score=yy,group=g))
# find the quartiles and iqr for each category
groups = df.groupby('group')
q1 = groups.quantile(q=0.25)
q2 = groups.quantile(q=0.5)
q3 = groups.quantile(q=0.75)
iqr = q3 - q1
upper = q3 + 1.5*iqr
lower = q1 - 1.5*iqr
# find the outliers for each category
def outliers(group):
cat = group.name
return group[(group.score > upper.loc[cat]['score']) | (group.score < lower.loc[cat]['score'])]['score']
out = groups.apply(outliers).dropna()
# prepare outlier data for plotting,we need coordinates for every outlier.
if not out.empty:
outx = []
outy = []
for keys in out.index:
outx.append(keys[0])
outy.append(out.loc[keys[0]].loc[keys[1]])
p = figure(tools="",background_fill_color="#efefef",x_range=cats,toolbar_location=None)
# if no outliers,shrink lengths of stems to be no longer than the minimums or maximums
qmin = groups.quantile(q=0.00)
qmax = groups.quantile(q=1.00)
upper.score = [min([x,y]) for (x,y) in zip(list(qmax.loc[:,'score']),upper.score)]
lower.score = [max([x,y) in zip(list(qmin.loc[:,lower.score)]
# stems
p.segment(cats,upper.score,cats,q3.score,line_color="black")
p.segment(cats,lower.score,q1.score,line_color="black")
# Boxes
p.vbar(cats,0.7,q2.score,fill_color="#E08E79",line_color="black")
p.vbar(cats,fill_color="#3B8686",line_color="black")
# whiskers (almost-0 height rects simpler than segments)
p.rect(cats,0.2,0.01,line_color="black")
p.rect(cats,line_color="black")
# outliers
if not out.empty:
p.circle(outx,outy,size=6,color="#F38630",fill_alpha=0.6)
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = "white"
p.grid.grid_line_width = 2
p.xaxis.major_label_text_font_size="16px"
# Format the tooltip
tooltips = [
('q1','@q2'),('q2','@q1'),('q3','@q3'),('iqr','@iqr'),]
# Add the HoverTool to the figure
p.add_tools(HoverTool(tooltips=tooltips))
output_file("Boxplot.html",title="Boxplot.py example")
show(p)
解决方法
除了一些内置的“特殊”变量(如鼠标坐标)以外,悬停工具还希望ColumnDataSource
中的列名。要访问q1
,q2
等,您需要使用ColumnDataSource
而不是普通数组来创建框。
首先,为方框准备数据:
# separate ColumnDataSource for boxes
boxes_data = pd.concat([
q1.rename(columns={"score":"q1"}),q2.rename(columns={"score":"q2"}),q3.rename(columns={"score":"q3"}),iqr.rename(columns={"score":"iqr"})
],axis=1)
接下来,在绘制方框的代码部分中,将引用保存到渲染器,以使HoverTool
仅在渲染器上触发,而不在图形范围内触发:
# boxes
boxes_source = ColumnDataSource(boxes_data)
top_box = p.vbar(
"group",0.7,"q2","q3",fill_color="#E08E79",line_color="black",source=boxes_source)
bottom_box = p.vbar(
"group","q1",fill_color="#3B8686",source=boxes_source)
# add hover just to the two box renderers
box_hover = HoverTool(renderers=[top_box,bottom_box],tooltips=[
('q1','@q1'),('q2','@q2'),('q3','@q3'),('iqr','@iqr')
])
p.add_tools(box_hover)