/*加工集合类型标签用户购买次数最多的3个商品*/SELECT xwho,distinct_id, array_agg(productdel) ASvalueFROM(SELECT xwho,distinct_id, product,RANK() OVER (PARTITIONBY distinct_id ORDER BY ct DESC) AS rank FROM(SELECT xwho,distinct_id,product,COUNT(*) AS ct FROM event_vd WHERExwhat='buy'/*事件名称*/GROUP BY xwho,distinct_id,product /*事件属性-商品名称*/)a )b WHERErank <=3/*指定取次数最多的多少个商品作为标签*/GROUP BY xwho,distinct_id
SELECT xwho,distinct_id,DATE_DIFF('day',from_unixtime(lastbuy/1000),now())ASVALUEFROM(SELECT xwho,distinct_id,MAX(xwhen) as lastbuyFROM event_vdWHERE xwhat='buy'GROUP BY xwho,distinct_id) a/*xwhat指定购买行为,取购买行为的最大日期作为最后购买时间通过DATE_DIFF函数计算最后购买时间到当前的间隔天数作为标签*/
SELECT xwho,distinct_id,CASE/*4. 按照每个用户rfm三项的得分给用户做分层*/WHEN r=1and f=1and m=1THEN'重要价值客户'WHEN r=0and f=1and m=1THEN'重要保持客户'WHEN r=0and f=0and m=1THEN'重要挽留客户'WHEN r=1and f=0and m=1THEN'重要发展客户'WHEN r=1and f=1and m=0THEN'一般价值客户'WHEN r=1and f=0and m=0THEN'一般发展客户'WHEN r=0and f=1and m=0THEN'一般保持客户'WHEN r=0and f=0and m=0THEN'一般挽留客户'ENDASVALUEFROM(SELECT xwho,distinct_id,/*3. 每个用户的得分比上平均分,大于平均则为1,否则为0*/CASEWHEN r/avg_r>1THEN1ELSE0ENDAS r,CASEWHEN f/avg_f>1THEN1ELSE0ENDAS f,CASEWHEN m/avg_m>1THEN1ELSE0ENDAS m FROM( Ss select a.xwho as xwho,a.distinct_id as distinct_id,a.r as r,a.f as f,a.m as m,/*2. 求分段后每个指标的平均分数*/avg(a.r) over() as avg_r,avg(a.f) over() as avg_f,avg(a.m) over() as avg_mFROM(SELECT xwho,distinct_id,tag_152,tag_148,tag_149,Ceiling(percent_rank() over (order by tag_152 desc)/0.2) as r,/*1. 此处0.2为按照20%的比例将指标切分为5段,并赋值1-5*/Ceiling(percent_rank() over (order by tag_148)/0.2) as f,ceiling(percent_rank() over (order by tag_149)/0.2) as mFROM smarttag_1 where ds=20200604)a)b)c/*计算此标签可预先将rfm三项分别保存为标签再进行计算,此处tag_152,tag_148,tag_149分别对应了最后购买距今天数、购买次数和购买总金额*/
5 首末次行为
将用户首次或最后一次发生某个行为的事件属性作为标签,如首次购买金额、最后访问平台等
/*最后访问平台*/SELECT a.xwho,a.distinct_id,a.platform ASVALUEFROM event_vd aINNER JOIN(SELECT xwho,distinct_id,max(xwhen) as tFROM event_vdWHERE xwhat='$startup'GROUP BY xwho,distinct_id) bON a.xwho=b.xwho and a.distinct_id=b.distinct_id and a.xwhen=b.t/*可通过调整max为min计算首次访问的平台名称作为标签*/
6 最近30天访问天数
SELECT xwho, distinct_id, COUNT(DISTINCT format_datetime(from_unixtime(xwhen/1000),'yyyyMMdd')) ASVALUEFROM event_vd WHERExwhat='$startup'andfrom_unixtime(xwhen/1000) betweennow()-interval '30'dayandnow() GROUP BY xwho,distinct_id/*可替换xwhat指定需要统计的事件名称可替换指定需要统计的时间周期可调整时间格式,统计访问的月数、周数、小时数等*/