问题描述
我必须在数据集中创建复杂的(对我而言)计数器变量。我正在尝试尽可能清楚地解释。如果有任何不清楚的地方,请通知我。希望在您的帮助下我能实现我的期望。
我需要创建三个变量: Probation_Count,Probation_Flag和Cure_Count。
创建三个特定于CID的变量(我们将按CID分组)。
试用期计数和试用期标记条件
- 条件1-当合同从Default_Flag = Y变为Default_Flag = N,并且probation_flag = Y时,probation_count从1开始。
- 条件2-只要DPD = 0并且Default_Flag = N,缓刑标志= Y,试用期计数就会增加
- 条件3-当DPD> 0且DPD ,probation_count将开始增加
- 条件4-当DPD> 3并且default_flag = N时,缓刑计数重置为0,直到DPD = 0并且default_Flag = N,缓刑标志= Y
- 条件5-probation_count可以增加到10,然后重置为0,probation_flag = Y,直到试用期计数= 10
- 条件6-每当Default_Flag = Y时,probation_count = 0且Probation_flag =N。要启动probation_count合同,必须从default_flag = Y移到Default_flag = N。
治疗次数条件
- 条件1-上一个日期的cur_count从1开始 probation_count为10,当前日期为default_flag = N
- 条件2-Cure_count将增加,直到默认标志= Y或Cure_count = 10
请在下面找到示例数据。
我已经手动计算 probation_count,probation_flag和curate_count。
data sample;
INFILE DATALInes DLM='#';
input CID date ddmmyy10. DPD Default_Flag $ Probation_Count probation_Flag $ Cure_count;
format date ddmmyy10.;
datalines;
111#04/04/2021#87#N#00# #0
111#05/04/2021#88#N#00# #0
111#06/04/2021#89#N#00# #0
111#07/04/2021#90#Y#00# #0
111#08/04/2021#91#Y#00# #0
111#09/04/2021#92#Y#00# #0
111#10/04/2021#93#Y#00# #0
111#11/04/2021#00#N#01#Y#0
111#12/04/2021#00#N#02#Y#0
111#13/04/2021#00#N#03#Y#0
111#14/04/2021#00#N#04#Y#0
111#15/04/2021#00#N#05#Y#0
111#16/04/2021#01#N#05#Y#0
111#17/04/2021#02#N#05#Y#0
111#18/04/2021#00#N#06#Y#0
111#19/04/2021#00#N#07#Y#0
111#20/04/2021#00#N#08#Y#0
111#21/04/2021#00#N#09#Y#0
111#22/04/2021#00#N#10#Y#0
111#23/04/2021#00#N#00# #1
111#24/04/2021#00#N#00# #2
111#25/04/2021#00#N#00# #3
222#04/04/2021#86#N#00# #0
222#05/04/2021#87#N#00# #0
222#06/04/2021#88#N#00# #0
222#07/04/2021#89#N#00# #0
222#08/04/2021#90#Y#00# #0
222#09/04/2021#91#Y#00# #0
222#10/04/2021#92#Y#00# #0
222#11/04/2021#93#Y#00# #0
222#12/04/2021#94#Y#00# #0
222#13/04/2021#95#Y#00# #0
222#14/04/2021#96#Y#00# #0
333#04/04/2021#87#N#00# #0
333#05/04/2021#88#N#00# #0
333#06/04/2021#89#N#00# #0
333#07/04/2021#90#Y#00# #0
333#08/04/2021#91#Y#00# #0
333#09/04/2021#92#Y#00# #0
333#10/04/2021#00#N#01#Y#0
333#11/04/2021#00#N#02#Y#0
333#12/04/2021#00#N#03#Y#0
333#13/04/2021#00#N#04#Y#0
333#14/04/2021#00#N#05#Y#0
333#15/04/2021#00#N#06#Y#0
333#16/04/2021#01#N#05#Y#0
333#17/04/2021#02#N#05#Y#0
333#18/04/2021#03#N#05#Y#0
333#19/04/2021#04#N#00#Y#0
333#20/04/2021#05#N#00#Y#0
333#21/04/2021#00#N#01#Y#0
333#22/04/2021#00#N#02#Y#0
333#23/04/2021#00#N#03#Y#0
333#24/04/2021#00#N#04#Y#0
333#25/04/2021#00#N#05#Y#0
333#26/04/2021#00#N#06#Y#0
333#27/04/2021#00#N#07#Y#0
333#28/04/2021#00#N#08#Y#0
333#29/04/2021#00#N#09#Y#0
333#30/04/2021#00#N#10#Y#0
333#01/05/2021#00#N#00# #1
333#02/05/2021#00#N#00# #2
333#03/05/2021#00#N#00# #3
333#04/05/2021#90#Y#00# #0
333#05/05/2021#91#Y#00# #0
;
run;
非常感谢您的时间和帮助
解决方法
数据和解释不是100%清晰,但是此示例代码可能会帮助您完全理解您要尝试的复杂规则。
我需要创建三个变量:Probation_Count,Probation_Flag和Cure_Count。
我希望这意味着这些变量及其值只能根据default_flag
和dpd
的状态和更改后的状态来计算。您不清楚如何或是否应该将上一行中计算出的值结转到下一行计算中。
示例:
data have;
INFILE DATALINES DLM='#';
input CID date ddmmyy10. DPD Default_Flag $ Probation_Count_X Probation_Flag_X $ Cure_Count_X;
format date ddmmyy10.;
datalines;
111#04/04/2021#87#N#00# #0
111#05/04/2021#88#N#00# #0
111#06/04/2021#89#N#00# #0
111#07/04/2021#90#Y#00# #0
111#08/04/2021#91#Y#00# #0
111#09/04/2021#92#Y#00# #0
111#10/04/2021#93#Y#00# #0
111#11/04/2021#00#N#01#Y#0
111#12/04/2021#00#N#02#Y#0
111#13/04/2021#00#N#03#Y#0
111#14/04/2021#00#N#04#Y#0
111#15/04/2021#00#N#05#Y#0
111#16/04/2021#01#N#05#Y#0
111#17/04/2021#02#N#05#Y#0
111#18/04/2021#00#N#06#Y#0
111#19/04/2021#00#N#07#Y#0
111#20/04/2021#00#N#08#Y#0
111#21/04/2021#00#N#09#Y#0
111#22/04/2021#00#N#10#Y#0
111#23/04/2021#00#N#00# #1
111#24/04/2021#00#N#00# #2
111#25/04/2021#00#N#00# #3
222#04/04/2021#86#N#00# #0
222#05/04/2021#87#N#00# #0
222#06/04/2021#88#N#00# #0
222#07/04/2021#89#N#00# #0
222#08/04/2021#90#Y#00# #0
222#09/04/2021#91#Y#00# #0
222#10/04/2021#92#Y#00# #0
222#11/04/2021#93#Y#00# #0
222#12/04/2021#94#Y#00# #0
222#13/04/2021#95#Y#00# #0
222#14/04/2021#96#Y#00# #0
333#04/04/2021#87#N#00# #0
333#05/04/2021#88#N#00# #0
333#06/04/2021#89#N#00# #0
333#07/04/2021#90#Y#00# #0
333#08/04/2021#91#Y#00# #0
333#09/04/2021#92#Y#00# #0
333#10/04/2021#00#N#01#Y#0
333#11/04/2021#00#N#02#Y#0
333#12/04/2021#00#N#03#Y#0
333#13/04/2021#00#N#04#Y#0
333#14/04/2021#00#N#05#Y#0
333#15/04/2021#00#N#06#Y#0
333#16/04/2021#01#N#05#Y#0
333#17/04/2021#02#N#05#Y#0
333#18/04/2021#03#N#05#Y#0
333#19/04/2021#04#N#00#Y#0
333#20/04/2021#05#N#00#Y#0
333#21/04/2021#00#N#01#Y#0
333#22/04/2021#00#N#02#Y#0
333#23/04/2021#00#N#03#Y#0
333#24/04/2021#00#N#04#Y#0
333#25/04/2021#00#N#05#Y#0
333#26/04/2021#00#N#06#Y#0
333#27/04/2021#00#N#07#Y#0
333#28/04/2021#00#N#08#Y#0
333#29/04/2021#00#N#09#Y#0
333#30/04/2021#00#N#10#Y#0
333#01/05/2021#00#N#00# #1
333#02/05/2021#00#N#00# #2
333#03/05/2021#00#N#00# #3
333#04/05/2021#90#Y#00# #0
333#05/05/2021#91#Y#00# #0
;
data want;
length rule $1 probation_count 8 probation_flag $1 cure_count 8;
length trigger_counting pcounting 8;
retain pcounting probation_count;
set have;
by cid;
rule = ' ';
if first.cid then do;
probation_count = 0;
probation_flag = ' ';
trigger_counting = 0;
pcounting = 0;
end;
trigger_counting =
default_flag = 'N'
and
( lag(default_flag) = 'Y' and NOT first.cid )
;
if default_flag = 'N' then do;
* set the counting flag 'pcounting' and initialize count;
if trigger_counting then do;
pcounting = 1;
probation_count = 1;
probation_flag = 'Y';
rule = '1';
return;
end;
* increment count for no dpd,reset if necessary;
if pcounting and dpd = 0 then do;
probation_count + 1;
probation_flag = 'Y';
rule = '2';
if probation_count > 10 then do;
probation_count = 0;
rule = '5';
end;
return;
end;
* pause counting for few dpd;
if pcounting and 0 < dpd <= 3 then do;
probation_flag = 'Y';
rule = '3';
return;
end;
* reset counting for high dpd;
if pcounting and dpd > 3 then do;
probation_count = 0;
probation_flag = 'Y';
rule = '4';
return;
end;
end;
else
if default_flag = 'Y' then do;
probation_count = 0;
probation_flag = 'N';
rule = '6';
end;
else do;
put 'ERROR: ' default_flag= _n_=;
stop;
end;
* drop trigger_counting pcounting;
run;