t*********l 发帖数: 778 | 1 原始表格
name day sale
a 1 23
a 2 56
a 3 45
........
a 100 34
b 1 4
b 2 4
b 3 5
....
b 100 45
week 从1-100
现在要sum sale for each name first 10 d, first 20d, first 30 d,,,一直到
100d
最后结果是
name sale10d sale20d sale30d ....sale100d
a
b
我现在用很笨的办法, subsetting dataset 变成 很多小的 datasets
第一个是只包括前10天, 第二个只包括前20天, ...
然后在分别做sum
想问下有没有简单点的办法 ? 感谢! | k*******a 发帖数: 772 | | o****o 发帖数: 8077 | 3 data test;
do id='a', 'b', 'c';
do day=1 to 100;
sale=ranuni(9796876)*100;
output;
end;
end;
run;
data fmt;
retain fmtname 'cssale' type 'n' hlo 'M';
retain start 1;
do end=10 to 100 by 10;
label=cats(put(end, Z3.),'d');
output;
end;
run;
proc format cntlin=fmt cntlout=fmtchk;
run;
proc means data=test noprint nway;
by id;
class day /mlf exclusive;
format day cssale.;
var sale;
output out=cumsum sum(sale)=salesum;
run;
proc transpose data=cumsum out=cumsumt name=day prefix=sale;
by id;
var salesum;
id day;
run;
【在 t*********l 的大作中提到】 : 原始表格 : name day sale : a 1 23 : a 2 56 : a 3 45 : ........ : a 100 34 : b 1 4 : b 2 4 : b 3 5
| s******y 发帖数: 352 | 4 借用oloolo 的sample data。 输出变量名字被一一列出。如果太多的话,可考虑用自
动生成放入到macro 变量里。
data test;
do id='a', 'b', 'c';
do day=1 to 100;
sale=ranuni(9796876)*100;
output;
end;
end;
run;
proc sort data=test;
by id day;
run;
data want;
if _n_=0 then set test;
array salesum {10} sale10d sale20d sale30d sale40d
sale50d sale60d sale70d sale80d
sale90d sale100d;
array _sale{100};
do _n_=1 by 1 until(last.id);
set test;
by id;
_sale(_n_)=sale;
if mod(_n_,10)=0 then
salesum((_n_/10))=sum(of _sale(*));
end;
drop sale _:;
run; | s*******d 发帖数: 3786 | 5 Try this one:
proc sort data=test out=one;
by name day;
run;
data two (drop =day sale sum);
array salesum{10} sale10d sale20d sale30d ....sale100d;
retain ssum sale10d sale20d sale30d ....sale100d;
set one;
by name day;
if first.name then ssum = sale;
else do;
ssum = ssum+sale;
if mode(day,10) =0 then do;
salesum{int(day/10)} = ssum;
end;
end;
if last.name then output;
run; | S******y 发帖数: 1123 | 6 #Python 3.1.2
import itertools
data_raw= '''
a 1 23
a 2 56
a 3 45
a 4 55
a 5 60
a 6 34
b 1 10
b 2 11
b 3 12
b 4 13
b 5 14
b 6 15'''
data_listing = data_raw.split('\n')
data_listing.remove('')
def get_my_id(ls):
my_id = ls[0:1]
return my_id
v = []
for my_id, g in itertools.groupby(data_listing,get_my_id):
ls = list(g)
v.append(my_id)
for index, item in enumerate(ls):
loc = index + 1
if loc % 2 == 0: #sum for two days as example
sum2 = float(item.split()[2]) + \
float(ls[index-1].split()[2])
v.append(str(sum2))
print('\t'.join(v))
v=[]
########################## NO WARRANTY ################## | r***k 发帖数: 13586 | 7 %macro cover();
local i;
proc sort data=datain; by name day; run;
data dataout;
retain
%do i=1 %to 10;
sale&i.0d
%end;
;
set datain;
by name day;
if first.name then do;
%do i=1 %to 10;
sale&i.0d=0;
%end;
end;
%do i=1 %to 10;
if day>(&i-1)*10 and day<=&i*10 and sale ne . then
sale&i.0d=sale&i.0d+sale;
%end;
if last.name;
drop day sale;
run;
%mend cover;
data datain;
do name='a', 'b', 'c';
do day=1 to 100;
sale=ranuni(9796876)*100;
output;
end;
end;
run;
%cover; | t*********l 发帖数: 778 | 8 Thank you all.
Actually, the day number is very random, which means today they want
10d , 20d, 30d, ...100d, another day they need like 5d, 12d, 37d,89d...
So the do loop and mod() dont help me too much here. | d*******o 发帖数: 493 | 9 /*USE OOLOO'S SIMULATION*/
data test;
do id='a', 'b', 'c';
do day=1 to 100;
sale=ranuni(9796876)*100;
output;
end;
end;
run;
/*SET UP A FRAME DATASET*/
data frame;
input id $;
cards;
a
b
c
;
run;
/*USE PROC SQL AND MACRO*/
%macro summary;
%do time=10 %to 100 %by 10;
proc sql;
create table table&time as
select id, sum(sale) as first&time.d
from test
where day lt &time
group by id
;
create table frame as
select a.*, b.first&time.d
from frame as a, table&time as b
where a.id = b.id
;
quit;
%end;
%mend;
%summary; | S******y 发帖数: 1123 | 10 #Python 3.1.2
DAYS_INTERVAL = [2,2,1,1] #e.g. total 6 days. define cut points
data_listing = data_raw.split('\n')
data_listing.remove('')
def get_my_id(ls):
my_id = ls[0:1]
return my_id
v = []
for my_id, g in itertools.groupby(data_listing,get_my_id):
ls = list(g)
v.append(my_id)
start = 0
end = 0
x = [float(item.split()[2]) for item in ls] #sale amt as a list
for item in DAYS_INTERVAL:
end += item
v.append(str(sum(x[start:end])))
start = end
print('\t'.join(v))
v=[]
########################## NO WARRANTY ################## | d*******o 发帖数: 493 | 11 /*SET UP A RESULT DATASET*/
data result;
do id = 'a', 'b', 'c' ;
output;
end;
run;
/*SPECIFIC INTERVALS ARE ASSIGNED*/
%macro summary2(time1, time2, time3, time4);
%do i=1 %to 4;
proc sql;
create table result as
select a.*, b.first&&time&i
from result as a, (
select id, sum(sale) as first&&time&i
from test
where day le &&time&i
group by id
) as b
where a.id = b.id
;
quit;
%end;
%mend;
/*USE oloolo'S SIMULATION DATASET TO TEST*/
data test;
do id='a', 'b', 'c';
do day=1 to 100;
sale=ranuni(9796876)*100;
output;
end;
end;
run;
/*INPUT THE INTERVALS AND HAVE THE RESULTS*/
%summary2(5, 12, 37, 89); |
|