Forum Stats

  • 3,768,302 Users
  • 2,252,772 Discussions
  • 7,874,521 Comments

Discussions

Oracle Getting latest N rows from a group

BeefStu
BeefStu Member Posts: 282 Blue Ribbon

Oracle 19.8

I have 3 tables (employees, locations, access_history) and I'm trying to get the last N (this case 2) records for each employee_id.


My expected outcome should be this(the latest 2 rows) based on access_history.access_date


EMPLOYEE_ID FIRST_NAME  LAST_NAME   CARD_NUM    LOCATION_ID LOCATION_NAME   ACCESS_DATE RN


1   Jane    Smith   A12345  16   Door 16  07162021 18:35:00   2

1   Jane    Smith   A12345  15   Door 15  07152021 18:35:00   1


2   John    Doe N546830 16   Door 16  07162021 18:35:00   2

2   John    Doe N546830 15   Door 15 07152021 08:25:00   1



The query I posted below partially works. It gives me the correct employee information but the location and access_history information is incorrect.


I thought a JOIN maybe needed and tried something like this but was unsuccessful.


FROM access_history a JOIN

     employees e

     ON a.employee_id = e.employee_id

JOIN

     locations l

     ON a.location_id =l.location_id 



Can someone please take a look and tell me how to fix this issue. In addition, please let me know if there is a more efficient way to go about solving this problem.


I set up a very simple test case to easily reproduce the problem. Thanks in advance to all who respond.




         ALTER SESSION SET 

           NLS_DATE_FORMAT = 'MMDDYYYY HH24:MI:SS';


        Create table employees(

         employee_id NUMBER(6), 

         first_name VARCHAR2(20),

         last_name VARCHAR2(20),

         card_num VARCHAR2(10)

        );


INSERT into employees(

    employee_id, 

    first_name,

    last_name,

    card_num

 )

     VALUES

 (1,'Jane','Smith', 'A12345');


INSERT into employees(

   employee_id, 

   first_name,

   last_name,

   card_num

  )

     VALUES

(2,'John','Doe','N546830');

               

         ALTER TABLE employees

                 ADD ( CONSTRAINT employees_pk

               PRIMARY KEY (employee_id));


        CREATE TABLE locations AS

            SELECT level AS location_id,

          'Door ' || level AS location_name,


            CASE           round(dbms_random.value(1,3)) 

            WHEN 1 THEN 'A' 

            WHEN 2 THEN 'T' 

            WHEN 3 THEN 'T' 

         END AS location_type

        FROM   dual

         CONNECT BY level <= 50;



             ALTER TABLE locations 

                 ADD ( CONSTRAINT locations_pk

               PRIMARY KEY (location_id));



        create table access_history(

              seq_num integer  GENERATED BY DEFAULT AS IDENTITY (START WITH 1) NOT NULL,

               employee_id NUMBER(6), 

               card_num varchar2(10),

               location_id number(4),

               access_date date,

               processed NUMBER(1) default 0

            );



        INSERT INTO access_history (employee_id, card_num, location_id, access_date)

        VALUES

        (1, '1A2B3C', 14,

        to_date('20210714 18:35:00','YYYYMMDD HH24:MI:SS'));


        INSERT INTO access_history (employee_id, card_num, location_id, access_date)

        VALUES

         (1, '1A2B3C', 15,

        to_date('20210715 18:35:00','YYYYMMDD HH24:MI:SS'));


        INSERT INTO access_history (employee_id, card_num, location_id, access_date)

        VALUES

         (1, '1A2B3C', 16,

         to_date('20210716 18:35:00','YYYYMMDD HH24:MI:SS'));


         INSERT INTO access_history (employee_id, card_num, location_id, access_date)

        VALUES

        (2, 'X1234Y', 16,

        to_date('20210716 18:35:00','YYYYMMDD HH24:MI:SS'));


        INSERT INTO access_history (employee_id, card_num, location_id, access_date)

          VALUES

         (2, 'X1234Y', 16,

         to_date('20210715 08:25:10','YYYYMMDD HH24:MI:SS'));


         INSERT INTO access_history (employee_id, card_num, location_id, access_date)

        VALUES

         (2, 'X1234Y', 14,

         to_date('20210714 23:36:00','YYYYMMDD HH24:MI:SS'));



         with rws as (

          select e.employee_id, 

                     e.first_name,

                     e.last_name,

                     e.card_num,

                     l.location_id,

                     l.location_name,

                     a.access_date,

                    row_number () over 

         (

              partition by e.employee_id

               order by e.employee_id

          ) rn   

         FROM employees e,

                   locations l,

                    access_history a

        )

         select * from rws

         where  rn <= 2

         order  by employee_id, location_id,  access_date desc;

Tagged:

Best Answer

  • Solomon Yakobson
    Solomon Yakobson Member Posts: 18,908 Red Diamond
    edited Jul 17, 2021 10:56PM Accepted Answer
    with rws as (
                 select  e.employee_id, 
                         e.first_name,
                         e.last_name,
                         e.card_num,
                         l.location_id,
                         l.location_name,
                         a.access_date,
                         row_number() over(
                                           partition by e.employee_id
                                           order by a.access_date desc
                                          ) rn   
                   from  employees e,
                         locations l,
                         access_history a
                   where a.employee_id = e.employee_id
                     and a.location_id = l.location_id
                )
    select  employee_id, 
            first_name,
            last_name,
            card_num,
            location_id,
            location_name,
            access_date,
            row_number() over(partition by employee_id order by access_date) rn
      from  rws
      where rn <= 2
      order by employee_id,
               rn desc
    /
    
    EMPLOYEE_ID FIRST_NAME LAST_NAME  CARD_NUM   LOCATION_ID LOCATION_NAME        ACCESS_DATE               RN
    ----------- ---------- ---------- ---------- ----------- -------------------- ----------------- ----------
              1 Jane       Smith      A12345              16 Door 16              07162021 18:35:00          2
              1 Jane       Smith      A12345              15 Door 15              07152021 18:35:00          1
              2 John       Doe        N546830             16 Door 16              07162021 18:35:00          2
              2 John       Doe        N546830             16 Door 16              07152021 08:25:10          1
    
    
    SQL>
    
    
    
    

    SY.

Answers

  • Frank Kulash
    Frank Kulash Member, Moderator Posts: 41,205 Red Diamond

    Hi, @BeefStu

    Thanks for posting the sample data; that's very helpful.

    Are you sure the results you posted are what you want from the given data? I can't get the same results.

    I can see a couple of problems in the query you posted. You forget to include the join conditions, and, if you want the last two rows by access_date, then the ORDER BY clause for the ROW_NUMBER function should be ORDER BY a.acess_date DESC.

    Try this:

         with rws as (
         select e.employee_id, 
               e.first_name,
               e.last_name,
               e.card_num,
               l.location_id,
               l.location_name,
               a.access_date,
              row_number () over 
         (
           partition by e.employee_id
            order by a.access_date DESC
         ) rn  
         FROM  employees	e
         JOIN	 access_history a ON a.employee_id = e.employee_id
    	 JOIN  locations 	l ON l.location_id = a.location_id
        )
         select * from rws
         where rn <= 2
         order by employee_id, location_id, access_date desc;
    


    BeefStu
  • Solomon Yakobson
    Solomon Yakobson Member Posts: 18,908 Red Diamond
    edited Jul 17, 2021 10:56PM Accepted Answer
    with rws as (
                 select  e.employee_id, 
                         e.first_name,
                         e.last_name,
                         e.card_num,
                         l.location_id,
                         l.location_name,
                         a.access_date,
                         row_number() over(
                                           partition by e.employee_id
                                           order by a.access_date desc
                                          ) rn   
                   from  employees e,
                         locations l,
                         access_history a
                   where a.employee_id = e.employee_id
                     and a.location_id = l.location_id
                )
    select  employee_id, 
            first_name,
            last_name,
            card_num,
            location_id,
            location_name,
            access_date,
            row_number() over(partition by employee_id order by access_date) rn
      from  rws
      where rn <= 2
      order by employee_id,
               rn desc
    /
    
    EMPLOYEE_ID FIRST_NAME LAST_NAME  CARD_NUM   LOCATION_ID LOCATION_NAME        ACCESS_DATE               RN
    ----------- ---------- ---------- ---------- ----------- -------------------- ----------------- ----------
              1 Jane       Smith      A12345              16 Door 16              07162021 18:35:00          2
              1 Jane       Smith      A12345              15 Door 15              07152021 18:35:00          1
              2 John       Doe        N546830             16 Door 16              07162021 18:35:00          2
              2 John       Doe        N546830             16 Door 16              07152021 08:25:10          1
    
    
    SQL>
    
    
    
    

    SY.

  • Solomon Yakobson
    Solomon Yakobson Member Posts: 18,908 Red Diamond

    Actually, we can reverse RN simpler by using MOD instead of analytic ROW_NUMBER:

    with rws as (
                 select  e.employee_id,
                         e.first_name,
                         e.last_name,
                         e.card_num,
                         l.location_id,
                         l.location_name,
                         a.access_date,
                         row_number() over(
                                           partition by e.employee_id
                                           order by a.access_date desc
                                          ) rn
                   from  employees e,
                         locations l,
                         access_history a
                   where a.employee_id = e.employee_id
                     and a.location_id = l.location_id
                )
    select  employee_id,
            first_name,
            last_name,
            card_num,
            location_id,
            location_name,
            access_date,
            mod(rn,2) + 1 rn
      from  rws
      where rn <= 2
      order by employee_id,
               rn desc
    /
    
    EMPLOYEE_ID FIRST_NAME LAST_NAME  CARD_NUM   LOCATION_ID LOCATION_NAME        ACCESS_DATE               RN
    ----------- ---------- ---------- ---------- ----------- -------------------- ----------------- ----------
              1 Jane       Smith      A12345              16 Door 16              07162021 18:35:00          2
              1 Jane       Smith      A12345              15 Door 15              07152021 18:35:00          1
              2 John       Doe        N546830             16 Door 16              07162021 18:35:00          2
              2 John       Doe        N546830             16 Door 16              07152021 08:25:10          1
    
    
    SQL>
    

    SY.

  • Solomon Yakobson
    Solomon Yakobson Member Posts: 18,908 Red Diamond
    edited Jul 18, 2021 11:05AM

    However, MOD will work juts for 2 latest rows. Efficient way to accomodate not just 2 but any number of latest rows:

    with rws as (
                 select  e.employee_id, 
                         e.first_name,
                         e.last_name,
                         e.card_num,
                         l.location_id,
                         l.location_name,
                         a.access_date,
                         row_number() over(
                                           partition by e.employee_id
                                            order by a.access_date desc
                                          ) rn,
                         count(*) over(partition by e.employee_id) cnt
                   from  employees e,
                         locations l,
                         access_history a
                   where a.employee_id = e.employee_id
                     and a.location_id = l.location_id
                )
    select  employee_id, 
            first_name,
            last_name,
            card_num,
            location_id,
            location_name,
            access_date,
            cnt - rn + 1 rn
      from  rws
      where rn <= :number_of_latest_rows
      order by employee_id,
               rn desc
    /
    
    

    SY.

    BeefStu