问题描述
我想想一想分类变量对于其他几个分类变量的变化。
我的数据看起来像这样
ax.xaxis.labelpad = -10 # Adjust x-axis label position
在我的真实#################################
### Modules imported used ###
#################################
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import date
import time
import matplotlib.pyplot as plt
import matplotlib
import matplotlib.dates as mdates
# file_path_setup = 'G:/Stocks/PowerPivotApps/Price download/'
# Performance_History = pd.read_csv(file_path_setup + 'Performance.txt',dtype=str,sep=',')
# Portfolio = Performance_History.loc[Performance_History['ExecutionType'] == 'All Portfolios']
# Portfolio = Performance_History.loc[Performance_History['ExecutionType'] == 'Selected Portfolios'] # remove "# set minimum level for performance time"
#Portfolios_Nr_of_Stocks = Portfolio['NrOfStocks']
#Portfolio_Performance_Time = Portfolio['PerformanceTime']
#Portfolio_Date = Portfolio['Date']
Portfolio_Date = ['2020-08-31','2020-09-01','2020-09-02','2020-09-03','2020-09-04','2020-09-07','2020-09-08','2020-09-09','2020-09-10','2020-09-11','2020-09-14','2020-09-15','2020-09-16','2020-09-17','2020-09-18','2020-09-21','2020-09-22','2020-09-23','2020-09-24','2020-09-25','2020-09-28','2020-09-29','2020-09-30','2020-10-01','2020-10-02','2020-10-05','2020-10-06','2020-10-07','2020-10-08','2020-10-09','2020-10-12','2020-10-13','2020-10-14','2020-10-15','2020-10-16']
Portfolio_Performance_Time =['00:11:11','00:11:07','00:11:16','00:10:42','00:10:54','00:10:46','00:10:27','00:11:23','00:11:35','00:10:23','00:10:51','00:41:22','00:11:05','00:11:15','00:10:50','00:10:41','00:19:47','00:10:43','00:10:48','00:11:12','00:10:45','00:11:02','00:10:57','00:11:01','00:15:17','00:14:33','00:18:49','00:14:28','00:20:45','00:14:29','00:14:45','00:17:52','00:14:37','00:14:08','00:15:05','00:14:46','00:14:39','00:14:40']
Portfolios_Nr_of_Stocks = ['621','619','617','622','621','613','607','620','680','679','681','488','678','676','676']
# Convert To integer
numberofstocks = [int(stock) for stock in Portfolios_Nr_of_Stocks]
# Convert to time
def get_sec(time_str):
"""Get Seconds from time."""
h,m,s = time_str.split(':')
return int(h) * 3600 + int(m) * 60 + int(s)
PerformanceTime = [get_sec(t) for t in Portfolio_Performance_Time]
# print(type(numberofstocks)) # print type
# convert to date series
date_portfolio = [datetime.strptime(d,'%Y-%m-%d') for d in Portfolio_Date]
# https://matplotlib.org/gallery/api/two_scales.html
# https://cmdlinetips.com/2019/10/how-to-make-a-plot-with-two-different-y-axis-in-python-with-matplotlib/
# create figure and axis objects with subplots()
fig,ax = plt.subplots(figsize=(12,8)) # figsize -> size of the plot window
# make a plot
ax.plot(date_portfolio,PerformanceTime,color="red",marker="x")
# set x-axis label
ax.set_xlabel("Date",fontsize=14)
# set y-axis label
ax.set_ylabel("Performance Time",fontsize=14)
# set title
ax.set_title("Execution History",fontsize=20,loc="center",pad=10)
# format y-axis label to hh:mm:ss
formatter_yx1 = matplotlib.ticker.FuncFormatter(lambda s,x: time.strftime('%H:%M:%S',time.gmtime(s)))
ax.yaxis.set_major_formatter(formatter_yx1)
# rotate x-axis lables and adjust size
plt.xticks(rotation=90,ha='right')
# plt.xticks(rotation=90,ha='right',fontsize='x-small') # Small font text
# set minimum level for performance time,y-axis 1
ax.set_ylim([min(PerformanceTime)-100,25*60]) # -100 -> set minimum. 25*60 -> Set maximum
# twin object for two different y-axis on the sample plot
ax2=ax.twinx()
# make a plot with different y-axis using second axis object
ax2.plot(date_portfolio,numberofstocks,color="blue",marker="o")
# ax2.set_ylim([620,680])
ax2.set_ylabel("Nr Of Stocks",fontsize=14)
# set minimum level for performance time,y-axis 2
ax2.set_ylim([600,max(numberofstocks)+10]) # -100 -> set minimum. 25*60 -> Set maximum
# set date interval
ax.xaxis.set_major_locator(mdates.DayLocator(interval=7)) # max interval
ax.xaxis.set_minor_locator(mdates.DayLocator(interval=1)) # minimum interval
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d')) # set date format
ax.xaxis.labelpad = -10 # Adjust x-axis label position
# Plot graph
plt.show()
中,我有13个类别变量,我想按类别对它们进行如下划分。
有没有更简单的方法可以对 Species Class species2
1 setosa 1 0
2 setosa 1 1
3 setosa 1 0
4 setosa 1 1
5 setosa 1 0
6 setosa 1 1
7 setosa 0 0
8 setosa 0 1
9 setosa 1 1
10 setosa 1 1
中的点阵或其他软件包执行此操作?理想情况下,我想要一个带有每个变量面板的图,显示每个级别的计数或百分率,并用变量“类”进行着色。
在此示例中,我使用了以下代码。
dataset
解决方法
也许您正在寻找这个。我建议您使用ggplot2
。接下来描述的代码可以根据需要为每个变量构建图。它被称为一个墓地。获得它的关键是将数据重塑得很长。根据您的代码,您必须根据每个变量进行拆分,然后使用lattice
创建每个单独的图。优点是tidyverse
(pivot_longer()
)中的函数使您可以轻松调整绘图函数的方式转换数据。这里的代码为每个变量生成一个图。就您而言,如果您有更多类别变量,则可以使用ncol
和nrow
选项调整构面。接下来的解决方案:
library(tidyverse)
#Data
data <- iris
data <- as.data.frame(data)
data <- data[-c(1,2,3,4)]
data$Class <- as.factor(c(rbinom(50,1,0.7),rbinom(100,0.1)))
data$species2 <- as.factor(rbinom(150,0.85))
#Code
data %>% pivot_longer(-Class) %>%
group_by(name,Class,value) %>%
summarise(N=n()) %>%
ggplot(aes(y=value,x=N,fill=Class))+
geom_bar(stat = 'identity',color='black',alpha=0.2)+
facet_wrap(.~name,scales='free')+
theme_bw()+
theme(legend.position = 'top')
输出:
对于百分比,您可以尝试以下操作:
#Code 2
data %>% pivot_longer(-Class) %>%
group_by(name,alpha=0.2,position = 'fill')+
scale_x_continuous(labels = scales::percent)+
facet_wrap(.~name,scales='free')+
theme_bw()+
theme(legend.position = 'top')
输出: