Forum Stats

  • 3,826,352 Users
  • 2,260,634 Discussions
  • 7,896,914 Comments

Discussions

Need help with analytic function

User_K6LH0
User_K6LH0 Member Posts: 64 Blue Ribbon

Hi,

I have a dataset which is part of a much larger query and need help in selecting certain rows based on the value of a column: processed_flag.

Here are the general guidelines: If in a dataset none of the rows have been processed then pick the row with the earliest start and end date. If a particular row has already been processed which is denoted by the flag: processed_flag = 'Y' then pick the very next row to process. If the very last row in the data set has a processed_flag = 'Y' then pick the same very last row to be processed again.

Here is an example: (The dataset will be ordered by start_date ASC, end_date ASC and the value of the processed_flag determines which row needs to be serviced next.)

Scenario 1: If any of the rows in the current dataset in question has a value of "Y" then pick the very next row to be processed. In this case (From the screenshot above) record_number = 3 is the answer.

Scenario 2: If in the dataset, the very last row has a value of "Y" then pick the same row to be processed again. In this case (From the screenshot above) record_number = 4 is the answer.

Scenario 3: If there are no "Y" values for the column processed_flag in a particular dataset then pick the very first row from the dataset to be processed. In this case (From the screenshot above) record_number = 1 is the answer.

I am hoping to use analytic function to resolve this and may be have an additional column at the end saying "next_record_to_be_processed" which will hold the answer for a given subset of data or please advise if there is a better way to accomplish the same.

Any help is appreciated. Thanks a lot!

--DDL and DMLs below
CREATE TABLE temp
(
 record_number NUMBER
 , start_date DATE
 , end_date DATE
 , processed_flag VARCHAR2(1)
);

--Scenario 1
INSERT INTO temp VALUES (1, TO_DATE('06/01/2022','MM/DD/YYYY'), TO_DATE('06/30/2022','MM/DD/YYYY'), 'N');
INSERT INTO temp VALUES (2, TO_DATE('07/01/2022','MM/DD/YYYY'), TO_DATE('07/15/2022','MM/DD/YYYY'), 'Y');
INSERT INTO temp VALUES (3, TO_DATE('07/16/2022','MM/DD/YYYY'), TO_DATE('08/15/2022','MM/DD/YYYY'), 'N');
INSERT INTO temp VALUES (4, TO_DATE('08/16/2022','MM/DD/YYYY'), NULL, 'N');

--TRUNCATE TABLE temp;

--Scenario 2
INSERT INTO temp VALUES (1, TO_DATE('06/01/2022','MM/DD/YYYY'), TO_DATE('06/30/2022','MM/DD/YYYY'), 'N');
INSERT INTO temp VALUES (2, TO_DATE('07/01/2022','MM/DD/YYYY'), TO_DATE('07/15/2022','MM/DD/YYYY'), 'N');
INSERT INTO temp VALUES (3, TO_DATE('07/16/2022','MM/DD/YYYY'), TO_DATE('08/15/2022','MM/DD/YYYY'), 'N');
INSERT INTO temp VALUES (4, TO_DATE('08/16/2022','MM/DD/YYYY'), NULL, 'Y');

--TRUNCATE TABLE temp;

--Scenario 3
INSERT INTO temp VALUES (1, TO_DATE('06/01/2022','MM/DD/YYYY'), TO_DATE('06/30/2022','MM/DD/YYYY'), 'N');
INSERT INTO temp VALUES (2, TO_DATE('07/01/2022','MM/DD/YYYY'), TO_DATE('07/15/2022','MM/DD/YYYY'), 'N');
INSERT INTO temp VALUES (3, TO_DATE('07/16/2022','MM/DD/YYYY'), TO_DATE('08/15/2022','MM/DD/YYYY'), 'N');
INSERT INTO temp VALUES (4, TO_DATE('08/16/2022','MM/DD/YYYY'), NULL, 'N');


Tagged:

Best Answer

  • Solomon Yakobson
    Solomon Yakobson Member Posts: 19,471 Red Diamond
    Answer ✓

    Only thing guaranteed to have an order is start_date ASC and end_date ASC as these are part of the original/existing query.

    Then:

    with t as (
               select  temp.*,
                       row_number() over(order by start_date,end_date nulls last) rn
                 from  temp
              )
    select  *
      from  t
      where rn = (
                  select  least(
                                max(rn),
                                nvl(
                                    max(case processed_flag when 'Y' then rn end) + 1,
                                    min(rn)
                                   )
                               )
                    from  t
                 )
    /
    

    SY.

«1

Answers

  • BEDE
    BEDE Oracle Developer Member Posts: 2,424 Gold Trophy
  • Stax
    Stax Member Posts: 41 Red Ribbon

    Hi, User_K6LH0

    one Y per scenario

    with t as (
    select 1 scenario,1 rn,'N' p_flag from dual union all
    select 1 scenario,2 rn,'Y' p_flag from dual union all
    select 1 scenario,3 rn,'N' p_flag from dual union all
    select 1 scenario,4 rn,'N' p_flag from dual union all
    --
    select 2 scenario,1 rn,'N' p_flag from dual union all
    select 2 scenario,2 rn,'N' p_flag from dual union all
    select 2 scenario,3 rn,'N' p_flag from dual union all
    select 2 scenario,4 rn,'Y' p_flag from dual union all
    --
    select 3 scenario,10 rn,'N' p_flag from dual union all
    select 3 scenario,20 rn,'N' p_flag from dual union all
    select 3 scenario,30 rn,'N' p_flag from dual union all
    select 3 scenario,40 rn,'N' p_flag from dual 
    )
    ,tt as (
    select t.*
     ,lag(p_flag) over (partition by scenario order by rn) lg
     ,lead(p_flag,1,'L') over (partition by scenario order by rn) le
     ,first_value(rn) over (partition by scenario order by rn) fv
     ,max(p_flag) over (partition by scenario) mx
     from t
    )
    select -- * 
     scenario,rn,p_flag
    from tt
    where
         (mx='Y' and lg='Y')
     or (mx='Y' and p_flag='Y' and le='L')
     or (mx='N' and rn=fv)
    
    SQL> /
    
    
      SCENARIO         RN P
    ---------- ---------- -
             1          3 N
             2          4 Y
             3         10 N
    
    
    SQL>
    
    


  • Solomon Yakobson
    Solomon Yakobson Member Posts: 19,471 Red Diamond

    Why do you need analytic solution?

    select  *
      from  temp
      where record_number = (
                             select  least(
                                           max(record_number),
                                           nvl(
                                               max(case processed_flag when 'Y' then record_number end) + 1,
                                               min(record_number)
                                              )
                                          ) record_number
                               from  temp
                           )
    /
    

    Test:

    SQL> select  *
      2    from  temp_scenario_1
      3  /
    
    RECORD_NUMBER START_DAT END_DATE  P
    ------------- --------- --------- -
                1 01-JUN-22 30-JUN-22 N
                2 01-JUL-22 15-JUL-22 Y
                3 16-JUL-22 15-AUG-22 N
                4 16-AUG-22           N
    
    SQL> select  *
      2    from  temp_scenario_1
      3    where record_number = (
      4                           select  least(
      5                                         max(record_number),
      6                                         nvl(
      7                                             max(case processed_flag when 'Y' then record_number end) + 1,
      8                                             min(record_number)
      9                                            )
     10                                        ) record_number
     11                             from  temp_scenario_1
     12                         )
     13  /
    
    RECORD_NUMBER START_DAT END_DATE  P
    ------------- --------- --------- -
                3 16-JUL-22 15-AUG-22 N
    
    SQL> select  *
      2    from  temp_scenario_2
      3  /
    
    RECORD_NUMBER START_DAT END_DATE  P
    ------------- --------- --------- -
                1 01-JUN-22 30-JUN-22 N
                2 01-JUL-22 15-JUL-22 N
                3 16-JUL-22 15-AUG-22 N
                4 16-AUG-22           Y
    
    SQL> select  *
      2    from  temp_scenario_2
      3    where record_number = (
      4                           select  least(
      5                                         max(record_number),
      6                                         nvl(
      7                                             max(case processed_flag when 'Y' then record_number end) + 1,
      8                                             min(record_number)
      9                                            )
     10                                        ) record_number
     11                             from  temp_scenario_2
     12                         )
     13  /
    
    RECORD_NUMBER START_DAT END_DATE  P
    ------------- --------- --------- -
                4 16-AUG-22           Y
    
    SQL> select  *
      2    from  temp_scenario_3
      3  /
    
    RECORD_NUMBER START_DAT END_DATE  P
    ------------- --------- --------- -
                1 01-JUN-22 30-JUN-22 N
                2 01-JUL-22 15-JUL-22 N
                3 16-JUL-22 15-AUG-22 N
                4 16-AUG-22           N
    
    SQL> select  *
      2    from  temp_scenario_3
      3    where record_number = (
      4                           select  least(
      5                                         max(record_number),
      6                                         nvl(
      7                                             max(case processed_flag when 'Y' then record_number end) + 1,
      8                                             min(record_number)
      9                                            )
     10                                        ) record_number
     11                             from  temp_scenario_3
     12                         )
     13  /
    
    RECORD_NUMBER START_DAT END_DATE  P
    ------------- --------- --------- -
                1 01-JUN-22 30-JUN-22 N
    
    SQL>
    

    SY.

    SmithJohn45
  • User_K6LH0
    User_K6LH0 Member Posts: 64 Blue Ribbon

    Thank you very much @Stax - Trying out your query.

    @Solomon Yakobson - Thanks for your response, I just thought analytic functions will be easier somehow.

    Sorry I forgot to mention that the record number may not be consecutive and also not in any particular order as those numbers come from a sequence. Only thing guaranteed to have an order is start_date ASC and end_date ASC as these are part of the original/existing query. I am trying to make the existing query an inline view and work on top of that.

    Thanks again!

  • Stax
    Stax Member Posts: 41 Red Ribbon

    Only thing guaranteed to have an order is start_date ASC and end_date ASC

    replace order by rn on order by start_date
    


  • User_K6LH0
    User_K6LH0 Member Posts: 64 Blue Ribbon

    Thanks again @Stax - I am still working on getting the query to work for me. Could you please explain how the MAX and FIRST_VALUE functions are helping the logic?

  • Solomon Yakobson
    Solomon Yakobson Member Posts: 19,471 Red Diamond
    Answer ✓

    Only thing guaranteed to have an order is start_date ASC and end_date ASC as these are part of the original/existing query.

    Then:

    with t as (
               select  temp.*,
                       row_number() over(order by start_date,end_date nulls last) rn
                 from  temp
              )
    select  *
      from  t
      where rn = (
                  select  least(
                                max(rn),
                                nvl(
                                    max(case processed_flag when 'Y' then rn end) + 1,
                                    min(rn)
                                   )
                               )
                    from  t
                 )
    /
    

    SY.

  • User_K6LH0
    User_K6LH0 Member Posts: 64 Blue Ribbon
    edited Jun 25, 2022 1:56PM

    *Nevermind - Was able to figure out - Thanks again!

    Hi @Solomon Yakobson - Thank you very much for your response.

    Is it possible using your query to do the following:

    If all the scenarios - temp_scenario_1, temp_scenario_2 and temp_scenario_3 data from your post earlier are part of the same data set meaning the 3 scenario data sets are combined into one ofcourse with unique record numbers. How would you suggest I update the query to get the resulting final solution? What would be the best way to get the 3 resulting rows in one set?

  • Stax
    Stax Member Posts: 41 Red Ribbon

    Sorry, for my very google bad english 🤣

    MAX-searches for whether Y (Y> N) is specified (for scenario 3)

    FIRST_VALUE - the first record number in the scenario given the sort (for scenario 3)

    I change test data

    1) record_number is not ordered

    Only thing guaranteed to have an order is start_date ASC and end_date ASC 

    2) start_date in the record 2,3 is equal (for test only)

    with t ( scenario_num, record_number, start_date, end_date, processed_flag) as (
    --Scenario 1
    select 1, 1, TO_DATE('06/01/2022','MM/DD/YYYY'), TO_DATE('06/30/2022','MM/DD/YYYY'), 'N' from dual union all
    select 1, 3, TO_DATE('07/01/2022','MM/DD/YYYY'), TO_DATE('07/15/2022','MM/DD/YYYY'), 'Y' from dual union all --!!! start date
    select 1, 2, TO_DATE('07/01/2022','MM/DD/YYYY'), TO_DATE('08/15/2022','MM/DD/YYYY'), 'N' from dual union all --!!! start date
    select 1, 4, TO_DATE('08/16/2022','MM/DD/YYYY'), NULL, 'N' from dual union all
    --Scenario 2
    select 2, 11, TO_DATE('06/01/2022','MM/DD/YYYY'), TO_DATE('06/30/2022','MM/DD/YYYY'), 'N' from dual union all
    select 2, 12, TO_DATE('07/01/2022','MM/DD/YYYY'), TO_DATE('07/15/2022','MM/DD/YYYY'), 'N' from dual union all
    select 2, 17, TO_DATE('07/16/2022','MM/DD/YYYY'), TO_DATE('08/15/2022','MM/DD/YYYY'), 'N' from dual union all
    select 2, 14, TO_DATE('08/16/2022','MM/DD/YYYY'), NULL, 'Y' from dual union all
    --Scenario 3
    select 3, 27, TO_DATE('06/01/2022','MM/DD/YYYY'), TO_DATE('06/30/2022','MM/DD/YYYY'), 'N' from dual union all
    select 3, 21, TO_DATE('07/01/2022','MM/DD/YYYY'), TO_DATE('07/15/2022','MM/DD/YYYY'), 'N' from dual union all
    select 3, 24, TO_DATE('07/16/2022','MM/DD/YYYY'), TO_DATE('08/15/2022','MM/DD/YYYY'), 'N' from dual union all
    select 3, 29, TO_DATE('08/16/2022','MM/DD/YYYY'), NULL, 'N' from dual 
    )
    ,tt as (
    select t.*
     ,lag(processed_flag) over (partition by scenario_num order by start_date, end_date) lg
     ,lead(processed_flag,1,'L') over (partition by scenario_num order by start_date, end_date) le
     ,first_value(record_number) over (partition by scenario_num order by start_date, end_date) fv
     ,max(processed_flag) over (partition by scenario_num) mx
     from t
    )
    select -- * 
     scenario_num, record_number, start_date, end_date, processed_flag
    from tt
    where
         (mx='Y' and lg='Y')
     or (mx='Y' and processed_flag='Y' and le='L')
     or (mx='N' and record_number=fv)
    
    SQL> /
    
    
    SCENARIO_NUM RECORD_NUMBER START_DATE END_DATE   P
    ------------ ------------- ---------- ---------- -
               1             2 01.07.2022 15.08.2022 N
               2            14 16.08.2022            Y
               3            27 01.06.2022 30.06.2022 N
    
    
  • Solomon Yakobson
    Solomon Yakobson Member Posts: 19,471 Red Diamond

    @Stax: Data magic. Try scenario where processed flag is Y for all rows. In such case result should be "If the very last row in the data set has a processed_flag = 'Y' then pick the same very last row to be processed again", however

    with t ( scenario_num, record_number, start_date, end_date, processed_flag) as (
    --Scenario 4
    select 4, 11, TO_DATE('06/01/2022','MM/DD/YYYY'), TO_DATE('06/30/2022','MM/DD/YYYY'), 'Y' from dual union all
    select 4, 12, TO_DATE('07/01/2022','MM/DD/YYYY'), TO_DATE('07/15/2022','MM/DD/YYYY'), 'Y' from dual union all
    select 4, 17, TO_DATE('07/16/2022','MM/DD/YYYY'), TO_DATE('08/15/2022','MM/DD/YYYY'), 'Y' from dual union all
    select 4, 14, TO_DATE('08/16/2022','MM/DD/YYYY'), NULL, 'Y' from dual
    )
    ,tt as (
    select t.*
     ,lag(processed_flag) over (partition by scenario_num order by start_date, end_date) lg
     ,lead(processed_flag,1,'L') over (partition by scenario_num order by start_date, end_date) le
     ,first_value(record_number) over (partition by scenario_num order by start_date, end_date) fv
     ,max(processed_flag) over (partition by scenario_num) mx
     from t
    )
    select -- *
     scenario_num, record_number, start_date, end_date, processed_flag
    from tt
    where
         (mx='Y' and lg='Y')
     or (mx='Y' and processed_flag='Y' and le='L')
     or (mx='N' and record_number=fv)
    /
    
    SCENARIO_NUM RECORD_NUMBER START_DAT END_DATE  P
    ------------ ------------- --------- --------- -
               4            12 01-JUL-22 15-JUL-22 Y
               4            17 16-JUL-22 15-AUG-22 Y
               4            14 16-AUG-22           Y
    
    SQL>
    

    My solution returns correct result:

    with temp ( record_number, start_date, end_date, processed_flag) as (
    --Scenario 4
    select 11, TO_DATE('06/01/2022','MM/DD/YYYY'), TO_DATE('06/30/2022','MM/DD/YYYY'), 'Y' from dual union all
    select 12, TO_DATE('07/01/2022','MM/DD/YYYY'), TO_DATE('07/15/2022','MM/DD/YYYY'), 'Y' from dual union all
    select 17, TO_DATE('07/16/2022','MM/DD/YYYY'), TO_DATE('08/15/2022','MM/DD/YYYY'), 'Y' from dual union all
    select 14, TO_DATE('08/16/2022','MM/DD/YYYY'), NULL, 'Y' from dual
    ),
    t as (
               select  temp.*,
                       row_number() over(order by start_date,end_date nulls last) rn
                 from  temp
              )
    select  *
      from  t
      where rn = (
                  select  least(
                                max(rn),
                                nvl(
                                    max(case processed_flag when 'Y' then rn end) + 1,
                                    min(rn)
                                   )
                               )
                    from  t
                 )
    /
    
    RECORD_NUMBER START_DAT END_DATE  P         RN
    ------------- --------- --------- - ----------
               14 16-AUG-22           Y          4
    
    SQL>
    

    SY.