Forum Stats

  • 3,836,728 Users
  • 2,262,174 Discussions
  • 7,900,088 Comments

Discussions

Query - Merging intervals

arizio
arizio Member Posts: 58 Blue Ribbon
edited Oct 5, 2009 7:21AM in SQL & PL/SQL
Hi all,
I need an help for merging multiple intervals in one grouping by a column value.

Here there is an examle
DROP TABLE rj_mytest PURGE;

CREATE TABLE rj_mytest (
 start_date DATE,
 end_date DATE,
 p1 NUMBER,
 p2 NUMBER
);

INSERT INTO rj_mytest (start_date, end_date, p1, p2)
     VALUES (TO_DATE ('20091010000000', 'YYYYMMDDHH24MISS'), TO_DATE ('20091011000000', 'YYYYMMDDHH24MISS'), 1, 10);
INSERT INTO rj_mytest (start_date, end_date, p1, p2)
     VALUES (TO_DATE ('20091011000000', 'YYYYMMDDHH24MISS'), TO_DATE ('20091012000000', 'YYYYMMDDHH24MISS'), 1, 11);
INSERT INTO rj_mytest (start_date, end_date, p1, p2)
     VALUES (TO_DATE ('20091012000000', 'YYYYMMDDHH24MISS'), TO_DATE ('20091013000000', 'YYYYMMDDHH24MISS'), 1, 12);
INSERT INTO rj_mytest (start_date, end_date, p1, p2)
     VALUES (TO_DATE ('20091013000000', 'YYYYMMDDHH24MISS'), TO_DATE ('20091014000000', 'YYYYMMDDHH24MISS'), 2, 13);
INSERT INTO rj_mytest (start_date, end_date, p1, p2)
     VALUES (TO_DATE ('20091014000000', 'YYYYMMDDHH24MISS'), TO_DATE ('20091015000000', 'YYYYMMDDHH24MISS'), 2, 10);
INSERT INTO rj_mytest (start_date, end_date, p1, p2)
     VALUES (TO_DATE ('20091015000000', 'YYYYMMDDHH24MISS'), TO_DATE ('20091016000000', 'YYYYMMDDHH24MISS'), 2, 15);
INSERT INTO rj_mytest (start_date, end_date, p1, p2)
     VALUES (TO_DATE ('20091016000000', 'YYYYMMDDHH24MISS'), TO_DATE ('20091017000000', 'YYYYMMDDHH24MISS'), 1, 10);
INSERT INTO rj_mytest (start_date, end_date, p1, p2)
     VALUES (TO_DATE ('20091017000000', 'YYYYMMDDHH24MISS'), TO_DATE ('20091018000000', 'YYYYMMDDHH24MISS'), 1, 12);
INSERT INTO rj_mytest (start_date, end_date, p1, p2)
     VALUES (TO_DATE ('20091018000000', 'YYYYMMDDHH24MISS'), TO_DATE ('20091019000000', 'YYYYMMDDHH24MISS'), 3, 121);
INSERT INTO rj_mytest (start_date, end_date, p1, p2)
     VALUES (TO_DATE ('20091019000000', 'YYYYMMDDHH24MISS'), TO_DATE ('20091020000000', 'YYYYMMDDHH24MISS'), 3, 112);
INSERT INTO rj_mytest (start_date, end_date, p1, p2)
     VALUES (TO_DATE ('20091120000000', 'YYYYMMDDHH24MISS'), TO_DATE ('20091121000000', 'YYYYMMDDHH24MISS'), 1, 210);
INSERT INTO rj_mytest (start_date, end_date, p1, p2)
     VALUES (TO_DATE ('20091121000000', 'YYYYMMDDHH24MISS'), TO_DATE ('20091122000000', 'YYYYMMDDHH24MISS'), 1, 410);
INSERT INTO rj_mytest (start_date, end_date, p1, p2)
     VALUES (TO_DATE ('20091122000000', 'YYYYMMDDHH24MISS'), TO_DATE ('20091123000000', 'YYYYMMDDHH24MISS'), 5, 310);
INSERT INTO rj_mytest (start_date, end_date, p1, p2)
     VALUES (TO_DATE ('20091123000000', 'YYYYMMDDHH24MISS'), TO_DATE ('20091124000000', 'YYYYMMDDHH24MISS'), 5, 210);
INSERT INTO rj_mytest (start_date, end_date, p1, p2)
     VALUES (TO_DATE ('20091124000000', 'YYYYMMDDHH24MISS'), TO_DATE ('20091125000000', 'YYYYMMDDHH24MISS'), 5, 110);


SELECT   a.*
FROM rj_mytest a
ORDER BY a.start_date, a.end_date;

START_DAT END_DATE          P1         P2
--------- --------- ---------- ----------
10-OTT-09 11-OTT-09          1         10
11-OTT-09 12-OTT-09          1         11
12-OTT-09 13-OTT-09          1         12
13-OTT-09 14-OTT-09          2         13
14-OTT-09 15-OTT-09          2         10
15-OTT-09 16-OTT-09          2         15
16-OTT-09 17-OTT-09          1         10
17-OTT-09 18-OTT-09          1         12
18-OTT-09 19-OTT-09          3        121
19-OTT-09 20-OTT-09          3        112
20-NOV-09 21-NOV-09          1        210
21-NOV-09 22-NOV-09          1        410
22-NOV-09 23-NOV-09          5        310
23-NOV-09 24-NOV-09          5        210
24-NOV-09 25-NOV-09          5        110
The result should be:
START_INTERVAL     END_INTERVAL       P1         
---------          ---------          ---------- 
10-OTT-09          13-OTT-09          1        
13-OTT-09          16-OTT-09          2        
16-OTT-09          18-OTT-09          1        
18-OTT-09          20-OTT-09          3        
20-NOV-09          22-NOV-09          1        
22-NOV-09          25-NOV-09          5        
Be careful: the adjacency condition is: END_DATE (row j) = START_DATE (row <> j). There are no overlapping intervals and there are also "holes" between intervals

Thanks a lot,
Riccardo

Best Answer

  • Aketi Jyuuzou
    Aketi Jyuuzou Member Posts: 1,072 Bronze Badge
    Answer ✓
    http://oreilly.com/catalog/9780596009762
    SQLCookBook mentions the same question ;-)
    select min(start_date),max(END_DATE),p1
    from (select start_date,END_DATE,p1,
          sum(willSum) over(order by start_date) as GID
          from (select start_date,END_DATE,p1,
                case when start_date
                        = Lag(end_date) over(partition by p1 order by start_date)
                     then 0 else 1 end as willSum
                  from rj_mytest))
    group by p1,GID
    order by min(start_date);
    
    MIN(STAR MAX(END_         P1
    -------- -------- ----------
    09-10-10 09-10-13          1
    09-10-13 09-10-16          2
    09-10-16 09-10-18          1
    09-10-18 09-10-20          3
    09-11-20 09-11-22          1
    09-11-22 09-11-25          5

Answers

  • arizio
    arizio Member Posts: 58 Blue Ribbon
    Hi,
    my current solution is:
    SELECT   MIN (c.start_date) start_interval,
             MAX (c.end_date) end_interval,
             c.p1,
             c.group_k
        FROM (SELECT b.*,
                     SUM (k) OVER (PARTITION BY 1 ORDER BY b.start_date, b.end_date) group_k
                FROM (SELECT a.*,
                             CASE
                                WHEN (TO_CHAR (a.start_date, 'YYYYMMDDHH24MISS') ||
                                      a.p1 =
                                         LAG (TO_CHAR (a.end_date, 'YYYYMMDDHH24MISS') ||
                                              a.p1,
                                              1,
                                              TO_CHAR (a.start_date, 'YYYYMMDDHH24MISS') ||
                                              a.p1
                                             ) OVER (PARTITION BY 1 ORDER BY a.start_date, a.end_date)
                                     )
                                   THEN 0
                                ELSE 1
                             END k
                        FROM rj_mytest a) b) c
    GROUP BY c.p1, c.group_k
    ORDER BY start_interval, end_interval;
    Anyone has a best solution?

    Riccardo


    PS: INTERVAL datatype is useful in this case?
  • Boneist
    Boneist Member Posts: 4,983 Gold Trophy
    edited Oct 5, 2009 5:30AM
    There's probably a more efficient method, but this does the trick, I think:
    with rj_mytest as (select to_date('10/10/2009', 'dd/mm/yyyy') start_date, to_date('11/10/2009', 'dd/mm/yyyy') end_date, 1 p1, 10 p2 from dual union all
                       select to_date('11/10/2009', 'dd/mm/yyyy') start_date, to_date('12/10/2009', 'dd/mm/yyyy') end_date, 1 p1, 11 p2 from dual union all
                       select to_date('12/10/2009', 'dd/mm/yyyy') start_date, to_date('13/10/2009', 'dd/mm/yyyy') end_date, 1 p1, 12 p2 from dual union all
                       select to_date('13/10/2009', 'dd/mm/yyyy') start_date, to_date('14/10/2009', 'dd/mm/yyyy') end_date, 2 p1, 13 p2 from dual union all
                       select to_date('14/10/2009', 'dd/mm/yyyy') start_date, to_date('15/10/2009', 'dd/mm/yyyy') end_date, 2 p1, 10 p2 from dual union all
                       select to_date('15/10/2009', 'dd/mm/yyyy') start_date, to_date('16/10/2009', 'dd/mm/yyyy') end_date, 2 p1, 15 p2 from dual union all
                       select to_date('16/10/2009', 'dd/mm/yyyy') start_date, to_date('17/10/2009', 'dd/mm/yyyy') end_date, 1 p1, 10 p2 from dual union all
                       select to_date('17/10/2009', 'dd/mm/yyyy') start_date, to_date('18/10/2009', 'dd/mm/yyyy') end_date, 1 p1, 12 p2 from dual union all
                       select to_date('18/10/2009', 'dd/mm/yyyy') start_date, to_date('19/10/2009', 'dd/mm/yyyy') end_date, 3 p1, 121 p2 from dual union all
                       select to_date('19/10/2009', 'dd/mm/yyyy') start_date, to_date('20/10/2009', 'dd/mm/yyyy') end_date, 3 p1, 112 p2 from dual union all
                       select to_date('20/11/2009', 'dd/mm/yyyy') start_date, to_date('21/11/2009', 'dd/mm/yyyy') end_date, 1 p1, 210 p2 from dual union all
                       select to_date('21/11/2009', 'dd/mm/yyyy') start_date, to_date('22/11/2009', 'dd/mm/yyyy') end_date, 1 p1, 410 p2 from dual union all
                       select to_date('22/11/2009', 'dd/mm/yyyy') start_date, to_date('23/11/2009', 'dd/mm/yyyy') end_date, 5 p1, 310 p2 from dual union all
                       select to_date('23/11/2009', 'dd/mm/yyyy') start_date, to_date('24/11/2009', 'dd/mm/yyyy') end_date, 5 p1, 210 p2 from dual union all
                       select to_date('24/11/2009', 'dd/mm/yyyy') start_date, to_date('25/11/2009', 'dd/mm/yyyy') end_date, 5 p1, 110 p2 from dual),
           results as (select start_date, 
                              end_date, 
                              p1,
                              lag(end_date) over (order by start_date, end_date, p1) prev_end_date,
                              lead(start_date) over (order by start_date, end_date, p1) next_start_date,
                              lag(p1) over (order by start_date, end_date, p1) prev_p1,
                              lead(p1) over (order by start_date, end_date, p1) next_p1
                       from   rj_mytest),
         inter_res as (select case when start_date &lt;&gt; nvl(prev_end_date, start_date -1) 
                                        or p1 &lt;&gt; nvl(prev_p1, p1||1) 
                                        then start_date end start_date_st,
                              case when end_date &lt;&gt; nvl(next_start_date, end_date + 1) 
                                        or p1 &lt;&gt; nvl(next_p1, p1||1)
                                        then end_date end end_date_end,
                              r.*
                       from   results r),
         final_res as (select last_value(start_date_st ignore nulls) over (order by start_date, end_date, p1) st_dt,
                              end_date_end,
                              p1
                       from   inter_res)
    select st_dt start_date,
           end_date_end end_date,
           p1
    from   final_res
    where end_date_end is not null;
    
    START_DAT END_DATE          P1
    --------- --------- ----------
    10/OCT/09 13/OCT/09          1
    13/OCT/09 16/OCT/09          2
    16/OCT/09 18/OCT/09          1
    18/OCT/09 20/OCT/09          3
    20/NOV/09 22/NOV/09          1
    22/NOV/09 25/NOV/09          5
    Edited by: Boneist on 05-Oct-2009 10:28
    corrected forum formatting issues, along with the fact that p1 is a varchar2...
    Boneist
  • arizio
    arizio Member Posts: 58 Blue Ribbon
    edited Oct 5, 2009 5:11AM
    Boneist,
    in your query there is something wrong..

    "inter_res as (select case when start_date nvl(prev_end_date, start_date -1) "

    Moreover: you make some add/minus operation on p1. Consider that p1 could be also a varchar2(100) ...

    R

    Edited by: arizio on Oct 5, 2009 2:10 AM
  • Boneist
    Boneist Member Posts: 4,983 Gold Trophy
    edited Oct 5, 2009 5:31AM
    arizio wrote:
    Boneist,
    in your query there is something wrong..

    "inter_res as (select case when start_date nvl(prev_end_date, start_date -1) "
    Ah, the forum can't handle &lt;&gt;...

    I've corrected my previous post and also changed the p1+/-1 to p1||1.

    Edited by: Boneist on 05-Oct-2009 10:30
  • arizio
    arizio Member Posts: 58 Blue Ribbon
    edited Oct 5, 2009 5:50AM
    Hi Boneist,
    in your first version of the query, there was not the "< >" in "select case when start_date < > nvl(prev_end_date, start_date -1) "

    Now, it works.

    I want to wait just till tomorrow to see, if someone has a more elegant solution then our ones.


    Riccardo

    Edited by: arizio on Oct 5, 2009 2:50 AM
  • Boneist
    Boneist Member Posts: 4,983 Gold Trophy
    arizio wrote:
    in your first version of the query, there was not the "<>"
    *{;-)

    It's a forum thing; if you want to display &lt;&gt; in the forum you have to change it to &amp;lt; &amp;gt; .... or you could use "!=" instead.
  • ravikumar.sv
    ravikumar.sv Member Posts: 1,072
    one more way...
    with rj_mytest as (
    select to_date('10/10/2009', 'dd/mm/yyyy') start_date, to_date('11/10/2009', 'dd/mm/yyyy') end_date, 1 p1, 10 p2 from dual union all
    select to_date('11/10/2009', 'dd/mm/yyyy') start_date, to_date('12/10/2009', 'dd/mm/yyyy') end_date, 1 p1, 11 p2 from dual union all
    select to_date('12/10/2009', 'dd/mm/yyyy') start_date, to_date('13/10/2009', 'dd/mm/yyyy') end_date, 1 p1, 12 p2 from dual union all
    select to_date('13/10/2009', 'dd/mm/yyyy') start_date, to_date('14/10/2009', 'dd/mm/yyyy') end_date, 2 p1, 13 p2 from dual union all
    select to_date('14/10/2009', 'dd/mm/yyyy') start_date, to_date('15/10/2009', 'dd/mm/yyyy') end_date, 2 p1, 10 p2 from dual union all
    select to_date('15/10/2009', 'dd/mm/yyyy') start_date, to_date('16/10/2009', 'dd/mm/yyyy') end_date, 2 p1, 15 p2 from dual union all
    select to_date('16/10/2009', 'dd/mm/yyyy') start_date, to_date('17/10/2009', 'dd/mm/yyyy') end_date, 1 p1, 10 p2 from dual union all
    select to_date('17/10/2009', 'dd/mm/yyyy') start_date, to_date('18/10/2009', 'dd/mm/yyyy') end_date, 1 p1, 12 p2 from dual union all
    select to_date('18/10/2009', 'dd/mm/yyyy') start_date, to_date('19/10/2009', 'dd/mm/yyyy') end_date, 3 p1, 121 p2 from dual union all
    select to_date('19/10/2009', 'dd/mm/yyyy') start_date, to_date('20/10/2009', 'dd/mm/yyyy') end_date, 3 p1, 112 p2 from dual union all
    select to_date('20/11/2009', 'dd/mm/yyyy') start_date, to_date('21/11/2009', 'dd/mm/yyyy') end_date, 1 p1, 210 p2 from dual union all
    select to_date('21/11/2009', 'dd/mm/yyyy') start_date, to_date('22/11/2009', 'dd/mm/yyyy') end_date, 1 p1, 410 p2 from dual union all
    select to_date('22/11/2009', 'dd/mm/yyyy') start_date, to_date('23/11/2009', 'dd/mm/yyyy') end_date, 5 p1, 310 p2 from dual union all
    select to_date('23/11/2009', 'dd/mm/yyyy') start_date, to_date('24/11/2009', 'dd/mm/yyyy') end_date, 5 p1, 210 p2 from dual union all
    select to_date('24/11/2009', 'dd/mm/yyyy') start_date, to_date('25/11/2009', 'dd/mm/yyyy') end_date, 5 p1, 110 p2 from dual
    )
    --end of data
    select start_Date,ldend_date end_date,p1 from 
    (
      select start_Date,lead(end_date) over (order by start_date) ldend_date,ldp1,p1,lgp1 from
      (
        select start_date,end_date,case when lead(p1) over (order by start_Date) is null then -1 else lead(p1) over (order by start_Date) end ldp1,p1 ,case when lag(p1) over (order by start_Date) is null then -1 else lag(p1) over (order by start_Date) end lgp1 from rj_mytest
      ) where  ldp1 != p1 or lgp1!=p1 
    ) where ldp1=p1
    Ravi Kumar
    ravikumar.sv
  • Aketi Jyuuzou
    Aketi Jyuuzou Member Posts: 1,072 Bronze Badge
    Answer ✓
    http://oreilly.com/catalog/9780596009762
    SQLCookBook mentions the same question ;-)
    select min(start_date),max(END_DATE),p1
    from (select start_date,END_DATE,p1,
          sum(willSum) over(order by start_date) as GID
          from (select start_date,END_DATE,p1,
                case when start_date
                        = Lag(end_date) over(partition by p1 order by start_date)
                     then 0 else 1 end as willSum
                  from rj_mytest))
    group by p1,GID
    order by min(start_date);
    
    MIN(STAR MAX(END_         P1
    -------- -------- ----------
    09-10-10 09-10-13          1
    09-10-13 09-10-16          2
    09-10-16 09-10-18          1
    09-10-18 09-10-20          3
    09-11-20 09-11-22          1
    09-11-22 09-11-25          5
  • arizio
    arizio Member Posts: 58 Blue Ribbon
    Aketi,
    that seems to me the best solution.

    It's quite similar to mine, but it's more elegant and efficient

    Thanks to all
    Riccardo
This discussion has been closed.