在Python中沿数字线刻度和刺绘制分类变量

问题描述

我正在尝试绘制一种“数字线”图(有点像直方图),其中我的每个变量的整个范围都显示为一条线,该线根据实际显示的值来着色数据集。我在使用数字变量方面已经相当成功,但是使用分类变量变得越来越棘手。

如何使分类标签居中于两个刻度线之间,并使彩色线条正确地停止和结束,以使它们也围绕标签居中?

这里有一些示例数据可供使用:

#Imports
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.colors as mcolors
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from datetime import date,datetime,timedelta
import datetime
from google.colab import files
import scipy
%matplotlib inline
pd.options.display.max_columns = 100

# Import sample data
gID = '0'
docID = '10f0BkWqf4rI9MFkR6NduntKj1oZ6aXXzezCXb06v1CU'
sample_data = pd.read_csv('https://docs.google.com/spreadsheets/d/' + docID + '/export?gid=' + gID + '&format=csv')
sample_data

这是我为数值创建的函数效果很好:

def numberLine(data,interval): # data = the list for which you want each value in expanded
  rounded = []
  for val in data: 
    round_item = 0
    count = 0
    while val > (interval - 1): # keep adding up the interval until you can't add another without going over val
      val -= interval
      count += 1 # count how many intervals were required
    round_item = count * interval # make list of vals 'rounded' to their lowest nearest interval
    rounded.append(round_item)
  x = [] # Make x,y list with results
  placeholder = []
  for val in rounded: # for every 'rounded' value,make a list of every number (with a resolution of the interval/100) in the interval
    placeholder = np.arange(val,(val + interval),(interval/100)).tolist()
    x = x + placeholder
  y = np.zeros(len(x)) # set the y values to all zeros for number line
  limit = max(x) # length of x-axis scale,but you don't have to use this if you want to set it manually while plotting
  return x,y,limit

这是我得到的分类变量(尽管运行,并且要点在这里,但这部分工作并不完全正确)

def numberLine_cat(data,bounds):
  dat = list(data) # make data column into a list and capitalize first letter
  #dat = [data.capitalize() for item in dat]
  #bounds = [bounds.capitalize() for item in bounds]
  placeholder = 0
  count = 0
  final = []
  for val in bounds: # for the entire set of possible categories,find those which data includes
    if val in dat: # each category becomes a 25-value set (ie cat1 = 0-100,cat2 = 100,200).
      placeholder = np.arange(((count * 10)+ 5),((count*10)+15),.1).tolist() # if numberline looks 'dotted',change '1' to a fraction of tot numbers
      final = placeholder + final
      count += 1
    elif val not in dat:
      count +=1
  x = final
  y = np.zeros(len(final))
  limit = len(bounds)*10 # length of the x-axis scale is the total # categories * 100
  return x,limit

下面是使用基于https://matplotlib.org/examples/ticks_and_spines/tick-locators.html

代码来绘制它的方法
# Define setup(ax,lim)
def setup(ax,lim): #ax stays the way it is,lim is the x axis max limit
    ax.spines['right'].set_color('none')
    ax.spines['left'].set_color('none')
    ax.yaxis.set_major_locator(ticker.NullLocator())
    ax.spines['top'].set_color('none')
    ax.xaxis.set_ticks_position('bottom')
    ax.tick_params(which='major',width=1.00)
    ax.tick_params(which='major',length=5)
    ax.tick_params(which='minor',width=0.75)
    ax.tick_params(which='minor',length=2.5)
    ax.set_xlim(0,lim)
    ax.set_ylim(0,1)
    ax.patch.set_alpha(0.0)

x,limit = numberLine(sample_data['Precipitation'],50);
x1,y1,limit1 = numberLine(sample_data['Temperature'],15)

species_bounds = ['dog','tree','mouse','elephant','dinosaur','turtle','human','dolphin','flower','elk','moose']
x2,y2,limit2 = numberLine_cat(sample_data['Species'],species_bounds)

#@title
fig = plt.figure(figsize=(10,2))
n = 3
ticklabelpad = mpl.rcParams['xtick.major.pad']

# Precip test
ax = plt.subplot(n,1,1) # Not sure why this is necessary..
setup(ax,limit)
ax.xaxis.set_major_locator(ticker.AutoLocator())
ax.xaxis.set_minor_locator(ticker.AutoMinorLocator())
#ax.text(0.0,0.1,"AutoLocator()",fontsize=14,transform=ax.transAxes)
ax.scatter(x,linewidth = '2',clip_on=False); # can add zorder > 0 to hide axis within points
ax.annotate('Precip. (mm)',xy=(-.13,0.15),xytext=(-1,-ticklabelpad),ha='left',va='top',xycoords='axes fraction',textcoords='offset points')

# Temperature test
ax = plt.subplot(n,2) # Not sure why this is necessary..
setup(ax,limit1)
ax.xaxis.set_major_locator(ticker.AutoLocator())
ax.xaxis.set_minor_locator(ticker.AutoMinorLocator())
#ax.text(0.0,transform=ax.transAxes)
ax.scatter(x1,clip_on=False); # can add zorder > 0 to hide axis within points
ax.annotate('Temp (C)',xy=(-.1,textcoords='offset points')

# Animal test
ax = plt.subplot(n,3) # Not sure why this is necessary..
setup(ax,limit2)
ax.xaxis.set_major_locator(ticker.AutoLocator())
ax.xaxis.set_minor_locator(ticker.AutoMinorLocator())
ax.set_xticklabels(species_bounds)
#ax.text(0.0,transform=ax.transAxes)
ax.scatter(x2,clip_on=False); # can add zorder > 0 to hide axis within points
ax.annotate('Animals',textcoords='offset points')

这是它的样子:(最后一行应该只突出显示狗,猫,树,大象,人,海豚和花)

number line plot generated with code,above

非常感谢任何想要参与的人! (这也是我的第一篇文章,因此,如果您有任何建议,我将不知所措。)

解决方法

暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!

如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。

小编邮箱:dio#foxmail.com (将#修改为@)