第一步: 确定组数。一组数据分多少组合适呢?一般与数据本身的特点及数据的多少有关。由于分组的目的之一是为了观察数据分布的特征,因此组数的多少应适中。如组数太少,数据 的分布就会过于集中,组数太多,数据的分布就会过于分散,这都不便于观察数据分布的特征和规律。组数的确定应以能够显示数据的分布特征和规律为目的。在实 际分组时,可以按 Sturges 提出的经验公式来确定组数 K
第二步: 确定各组的组距
组距 =(Max(Value)-Min(Value))/K
第三步: 根据分组整理成频数分布表
CREATE TABLE AutoGroup
(
Age INT
);
INSERT INTO AutoGroup VALUES(12);
INSERT INTO AutoGroup VALUES(10);
INSERT INTO AutoGroup VALUES(20);
INSERT INTO AutoGroup VALUES(25);
INSERT INTO AutoGroup VALUES(27);
INSERT INTO AutoGroup VALUES(30);
INSERT INTO AutoGroup VALUES(50);
INSERT INTO AutoGroup VALUES(60);
INSERT INTO AutoGroup VALUES(45);
INSERT INTO AutoGroup VALUES(46);
INSERT INTO AutoGroup VALUES(35);
INSERT INTO AutoGroup VALUES(30);
INSERT INTO AutoGroup VALUES(47);
INSERT INTO AutoGroup VALUES(20);
INSERT INTO AutoGroup VALUES(61);
--SELECT age FROM AutoGroup
WITH MaxAndMin AS
(
SELECT MAX(age) Maxnum,MIN(age) MinNum,COUNT(*) CountNum FROM AutoGroup
),
-- 第二步,确定组数和组距。
GroupStep AS
(
SELECT
Maxnum,MinNum,CountNum,
CEILING (1+LOG(CountNum)/LOG(2)) GroupNum,
CEILING(CEILING(Maxnum-MinNum)/CEILING(1+LOG(CountNum)/LOG(2))) GroupStep
FROM MaxAndMin
),
-- 第三步,递归生成相应的频数分布表
GroupStandard(MinGroup,MaxGroup,Identiy) AS
(
SELECT
e.MinNum MinGroup,e.MinNum+e.GroupStep MaxGroup,1 AS Identiy
FROM GroupStep AS e
UNION ALL
SELECT
CAST(e.MinNum+e.GroupSteP*identiy AS INT) MinGroup,
e.MinNum+e.GroupSteP*(Identiy+1) MaxGroup,
Identiy+1 c
FROM GroupStep AS e,GroupStandard d
WHERE d.Identiy<e.GroupNum
)
-- 第四步,根据频数表和原表进行相关区间统计
SELECT MinGroup,count (*)
FROM AutoGroup a,GroupStandard b
WHERE a. age>= b. MinGroupAND a. age< b. MaxGroup
GROUP BY MinGroup,MaxGroup