7 Replies Latest reply on Jan 2, 2015 9:01 PM by Jonathan Lewis

    Performance tuning queries to get most recent price

    Alan Lawlor

      Happy New Year everybody.

       

      I have a price table in my system which has multiple prices for each item/product with the date the price became effective.

      I have queries throughout the system which need to retrieve the most recent effective price for the date of the transaction.

       

      The easiest way I can find to implement is to have a user-defined function to retrieve the price.

       

      My problem is that many of my queries are accessing large volumes of data (e.g. transactions) and my price table is also quite large - both have millions of records. By using a user-defined Pl/SQL function in my query, I get lots of context-switching between SQL and PL/SQL and my queries do not perform well

       

      here is some sample code, which simplifies my scenario:

      drop table xo_stock_trans;
      create table xo_stock_trans (item varchar2(25), trans_date date, quantity number(20,4));
      insert into xo_stock_trans values('A',TO_DATE('25-DEC-2014','DD-MON-YYYY'), 4);
      insert into xo_stock_trans values('A',TO_DATE('27-DEC-2014','DD-MON-YYYY'), -2);
      insert into xo_stock_trans values('A',TO_DATE('28-DEC-2014','DD-MON-YYYY'), 5);
      insert into xo_stock_trans values('B',TO_DATE('23-DEC-2014','DD-MON-YYYY'), 20);
      insert into xo_stock_trans values('B',TO_DATE('26-DEC-2014','DD-MON-YYYY'), -6);
      insert into xo_stock_trans values('B',TO_DATE('29-DEC-2014','DD-MON-YYYY'), 15);
      /
      -- Generate lots more data
      BEGIN
          -- Generate more trans dates
          for r in 1..1000
          LOOP
              insert into xo_stock_trans
              select item, trans_date - r - 7 as  trans_date, ROUND(dbms_random.value(1,50),2) as quantity
              from xo_stock_trans
              where trans_date between TO_DATE('23-DEC-2014','DD-MON-YYYY') AND TO_DATE('29-DEC-2014','DD-MON-YYYY')
                and item in ('A','B');
          END LOOP;
          COMMIT;
          -- generate more items
          for lt in 1..12 
          LOOP
              -- generate C,D, E, items
              INSERT into xo_stock_trans
              SELECT chr(ascii(item)+(lt*2)) as item, trans_date, quantity
              from xo_stock_trans
              where item in ('A','B');
              -- generate A1, A2, B1, B2, etc
              for nm in 1..10
              LOOP
                  INSERT INTO xo_stock_trans
                  select item || to_char(nm), trans_date, quantity
                  from xo_stock_trans
                  where length(item) = 1;
              END LOOP;
              COMMIT;
          END LOOP;
          COMMIT;
      END;
      /
      create index xo_stock_trans_ix1 on xo_stock_trans (item);
      create index xo_stock_trans_ix2 on xo_stock_trans (trans_date);
      exec dbms_stats.gather_table_stats(ownname =>user, tabname => 'XO_STOCK_TRANS' , estimate_percent => 100, degree => dbms_stats.auto_degree, cascade=>true);
      /
      
      
      drop table xo_prices;
      create table xo_prices (item varchar2(25), price_date date, gross_price number(20,4), net_price number(20,4), special_price number(20,4) );
      insert into xo_prices values ('A', to_date('01-DEC-2014','DD-MON-YYYY'), 10, 8, 6);
      insert into xo_prices values ('A', to_date('25-DEC-2014','DD-MON-YYYY'), 9, 8, 6);
      insert into xo_prices values ('A', to_date('26-DEC-2014','DD-MON-YYYY'), 7, 6, 4);
      insert into xo_prices values ('B', to_date('01-DEC-2014','DD-MON-YYYY'), 5.50, 4.50, 3);
      insert into xo_prices values ('B', to_date('25-DEC-2014','DD-MON-YYYY'), 5.00, 4.00, 3);
      insert into xo_prices values ('B', to_date('26-DEC-2014','DD-MON-YYYY'), 3.50, 2.50, 2);
      /
      -- Generate lots more data
      BEGIN
          -- Generate more price dates
          for r in 1..1000
          LOOP
              insert into xo_prices
              select item, price_date - r - 7 as  price_date,gross_price, net_price, special_price
              from xo_prices
              where price_date between TO_DATE('23-DEC-2014','DD-MON-YYYY') AND TO_DATE('29-DEC-2014','DD-MON-YYYY')
                and item in ('A','B');
          END LOOP;
          COMMIT;
          -- generate more items
          for lt in 1..12 
          LOOP
              -- generate C,D, E, items
              INSERT into xo_prices
              SELECT chr(ascii(item)+(lt*2)) as item, price_date, gross_price + (lt*2), net_price + (lt*2), special_price + (lt*2)
              from xo_prices
              where item in ('A','B');
              -- generate A1, A2, B1, B2, etc
              for nm in 1..10
              LOOP
                  INSERT INTO xo_prices
                  select item || to_char(nm), price_date, gross_price, net_price, special_price
                  from xo_prices
                  where length(item) = 1;
              END LOOP;
              COMMIT;
          END LOOP;
      END;
      /
      
      create index xo_prices_ix1 on xo_prices (item, price_date);
      exec dbms_stats.gather_table_stats(ownname =>user, tabname => 'XO_PRICES' , estimate_percent => 100, degree => dbms_stats.auto_degree, cascade=>true);
      /
      
      create or replace function xo_get_price(I_Item in VARCHAR2, I_Date in DATE, i_Price_type IN VARCHAR2) RETURN NUMBER
      IS
          -- Function to get most recent effective price prior to the date
          CURSOR c_get_prices(P_Item VARCHAR2, P_Date VARCHAR2)
          IS
          SELECT gross_price, net_price, special_price
          FROM XO_PRICES
          WHERE item = P_Item
           AND price_date <= P_Date
          ORDER BY price_date desc; -- most recent price
          
          l_gross_price NUMBER(20,4);
          l_net_price NUMBER(20,4);
          l_special_price NUMBER(20,4);
      BEGIN
          OPEN c_get_prices(I_Item, I_Date);
          FETCH c_get_prices INTO l_gross_price, l_net_price, l_special_price;
          CLOSe c_get_prices;
          
          IF I_Price_Type='GROSS' then return l_gross_price;
          ELSIF I_Price_Type= 'NET' then return l_net_price;
          ELSIF I_Price_Type= 'SPECIAL' then return l_special_price;
          END IF;
      END xo_get_price;
      /
      
      -- Here is a typical query I am trying to perform
      select tr.item, tr.trans_date, tr.quantity
          , xo_get_price(tr.item, tr.trans_date, 'GROSS') as gross_price
          , xo_get_price(tr.item, tr.trans_date, 'NET') as net_price
          , xo_get_price(tr.item, tr.trans_date, 'SPECIAL') as special_price
      from xo_stock_trans tr
      where tr.trans_date between '01-AUG-2014' and '31-AUG-2014';
      

       

      i would like to refactor my query so that I do not use Pl/SQL user functions, but so far I cannot get something which performs better than the above SQL. For example, the following query takes MUCH longer:

       

      select tr.item, tr.trans_date, tr.quantity
          , pr.gross_price
          , pr.net_price
          , pr.special_price
      from xo_stock_trans tr
      join xo_prices pr on pr.item = tr.item
                      and pr.price_date = (select max(pr2.price_date)
                                           from xo_prices pr2
                                           where pr2.item = pr.item
                                             and pr2.price_date <= tr.trans_date
                                           )
      where tr.trans_date between '01-AUG-2014' and '31-AUG-2014';
      

       

      I am interested in whether anybody has addressed a similar scenario and have managed to write better performing code.

      I have looked at determinism/manual caching of the function, but the item/date combinations are fairly unique and therefore it does not benefit from it.

       

      Any suggestion considered - parallelism, analytic functions, pipelining, etc.

       

      Alan

        • 1. Re: Performance tuning queries to get most recent price
          Frank Kulash

          Hi, Alan,

           

          Alan Lawlor wrote:

          ...

          My problem is that many of my queries are accessing large volumes of data (e.g. transactions) and my price table is also quite large - both have millions of records. By using a user-defined Pl/SQL function in my query, I get lots of context-switching between SQL and PL/SQL and my queries do not perform well ...

           

          You got that right!  User-defined functions can be very convenient, but that convenience comes with a price.

           

          What version of Oracle are you using?  In Oracle 12, there's a new "temporal validity" feature that might help you.

           

          In any version, it will be much faster if you add a new column to the xo_prices table.  You might call this END_DATE, though it would actually be the date when some other price took effect.  You might put DATE '9999-12-31' in the END_DATE column for current prices.  You can calculate END_DATE using the analytic LEAD function.  Be careful to re-calcluate END_DATE when you insert new rows into xo_prices, or when you update the dates on existing rows.

           

          Once you have both PRICE_DATE and END_DATE in the XO_PRICES table, you can join that table to get the effective price as of d by including

          AND  d >= xo_prices.price_date

          AND  d <  xo_prices.end_date

          in the join condition.

           

          In some situations, especially when you don't have many distinct (item, dates) combinations, scalar-sub-queries might be faster than joins.

          Either way, there's no PL/SQL involved, so there's no context switching.

          • 2. Re: Performance tuning queries to get most recent price
            James Su

            try:

             

            select tr.item, tr.trans_date, tr.quantity 
                  ,(SELECT MAX(gross_price||','||net_price||','||special_price) KEEP(DENSE_RANK FIRST ORDER BY price_date DESC) FROM XO_PRICES 
                     WHERE item = tr.item AND price_date <= tr.trans_date)   ---- you need another layer to parse three fields from a CSV string
            from xo_stock_trans tr 
            where tr.trans_date between date '2014-8-1' and date '2014-8-1';


            select * from (
            select tr.item, tr.trans_date, tr.quantity 
                , pr.gross_price 
                , pr.net_price 
                , pr.special_price 
                ,ROW_NUMBER() OVER(PARTITION BY tr.rowid order by pr.price_date desc) rn
            from xo_stock_trans tr 
                join xo_prices pr on pr.item = tr.item 
                and pr.price_date <= tr.trans_date
            where tr.trans_date between date '2014-8-1' and date '2014-8-1' 
            )
            where rn=1;

            1 person found this helpful
            • 3. Re: Performance tuning queries to get most recent price
              Alan Lawlor

              Thanks very much Frank and James

               

              My original query with user-def function takes between 10 and 40 seconds to run with the sample data I generated (cursor open + first 100 rows fetched).

              My alternative joining to a max subquery was a non-runner as I cancelled its running after 5 minutes (with an explain plan cost in the gazillions!)

               

              Here are the results of each of these suggestions:

              1. Frank's adding end date to price table : While adding it to the actual table could be very complex, I tried an alternative, which is to create a materialised view using the lead analytic function (the price table itself is a full refresh each night, so I just need to add an MV refresh thereafter). While the MV refresh takes approx 10 seconds to generate, it only needs to happen once in batch overnight (my real life data might take a bit longer).

              create materialized view xo_prices_mv

              as

              select item, price_date,

                      nvl(lead(price_date,1) over (partition by item order by price_date), to_date('31-DEC-2014','DD-MON-YYYY') ) as end_date,

                      gross_price, net_price, special_price

              from xo_prices;

               

              create index xo_prices_mv_idx1 on xo_prices_mv (item, price_date, end_date);

              exec dbms_stats.gather_table_stats(ownname =>user, tabname => 'XO_PRICES_MV' , estimate_percent => 100, degree => dbms_stats.auto_degree, cascade=>true);

              My new SQL takes less than one second - FAST (cursor open + first 100 rows fetched):

              select tr.item, tr.trans_date, tr.quantity

                  , pr.gross_price

                  , pr.net_price

                  , pr.special_price

              from xo_stock_trans tr

              join xo_prices_mv pr on pr.item = tr.item

                              and pr.price_date <= tr.trans_date

                              and pr.end_date > tr.trans_date

              where tr.trans_date between '01-AUG-2014' and '31-AUG-2014'

               

              2. James' DENSE RANK analytic function approach:

              Parsing the three results out would make for a messy piece of SQL (IMHO), so I tried it with 3 subqueries (I think the work that the CBO does means this is not materially slower):

               

              select tr.item, tr.trans_date, tr.quantity 

              ,(SELECT MAX(gross_price) KEEP(DENSE_RANK FIRST ORDER BY price_date DESC) FROM XO_PRICES 

                       WHERE item = tr.item AND price_date <= tr.trans_date)        as gross_price

              ,(SELECT MAX(net_price) KEEP(DENSE_RANK FIRST ORDER BY price_date DESC) FROM XO_PRICES 

                       WHERE item = tr.item AND price_date <= tr.trans_date)        as net_price

              ,(SELECT MAX(special_price) KEEP(DENSE_RANK FIRST ORDER BY price_date DESC) FROM XO_PRICES 

                       WHERE item = tr.item AND price_date <= tr.trans_date)        as special_price         

              from xo_stock_trans tr 

              where tr.trans_date between '01-SEP-2014' and '30-SEP-2014';

              This is also very fast being less than 1 second and has the added advantage of not requiring changes to the table DDL (adding end_date column), any additional objects(e.g. an MV)  or the need for additional maintenance processed (e.g. refresh MV).

               

              3. James' ROW_NUMBER analytic function approach:

              While it is the simplest-looking SQL, the problem where is that its cardinality is very high (as it returns every current and historical price first, then filters out older ones):

              select * from (

              select tr.item, tr.trans_date, tr.quantity 

                  , pr.gross_price 

                  , pr.net_price 

                  , pr.special_price 

                  ,ROW_NUMBER() OVER(PARTITION BY tr.rowid order by pr.price_date desc) rn

              from xo_stock_trans tr 

                  join xo_prices pr on pr.item = tr.item 

                  and pr.price_date <= tr.trans_date

              where tr.trans_date between  '01-AUG-2014' and '31-AUG-2014'

              )

              where rn=1;

              Results did not return within 1 minute, so I stopped the query and ruled this approach out.

               

              I am going to proceed with Frank's approach as I do not see much of a difference between (1) and (2). While there is more upfront work in creating and maintaining a MV, the resulting changes (to lot's of) SQLs in Reports, Inquiries, ETLs and other batch processes will leave elegant and simple SQL queries.

               

              Happy New Year !

               

              Alan

              • 4. Re: Performance tuning queries to get most recent price
                Alan Lawlor

                Frank - In answer to your question on DB version, we are on 11gR2, but plan to upgrade to 12c in next few months. I will research the "temporal validity" to see how useful that might be in the future.

                • 5. Re: Performance tuning queries to get most recent price
                  Frank Kulash

                  Hi, Alan,

                  Alan Lawlor wrote:

                   

                   

                          nvl(lead(price_date,1) over (partition by item order by price_date), to_date('31-DEC-2014','DD-MON-YYYY') ) as end_date,

                  ...

                  Do you really want to hard-code a real date like that?  I would use an impossible date, such as December 31, 9999 instead, or, depending on how the value was to be used, take TRUNC (SYSDATE) or TRUNC (SYSDATE + 1) as the default value.

                   

                  Also, you probably might not want  NVL.  Instead, you can pass a 3rd argument to LEAD, like this:

                  LEAD ( price_date

                       , 1

                       , TRUNC (SYSDATE + 1)    -- or whatever default value you want

                       ) OVER ( PARTITION BY  item

                                ORDER BY      price_date

                              )    AS end_date,

                  LEAD will return the default value only when there are no more rows in the partition.

                  NVL will return the default value whenever the 1st argument is NULL.  No more rows in the partition is one reason why that value might be NULL; actually having a NULL in the table is another.

                  In cases where you can have NULLs in the table, you probably want those NULLs in the result set, too.

                  In cases where it's impossible to have NULLs in the table (and I assume this is one of those cases) the results are the same if you use NVL or the 3rd argument to LEAD.  LEAD might be slightly more efficient (certainly no less efficient) than NVL, and you might find it simpler than NVL.  (I do.)

                  • 6. Re: Performance tuning queries to get most recent price
                    Alan Lawlor

                    Frank - you are right - I meant to use 31-DEC-9999 as the date for the most recent price, but given the day that is in it, I accidentally typed 31-DEC-2014

                     

                    I take your point and I will use the 3rd Param on the LEAD function rather than using NVL - even if only for elegance.

                     

                    As you suspect, the date will never be null, so I do not have to worry about that anyway.

                    • 7. Re: Performance tuning queries to get most recent price
                      Jonathan Lewis

                      It looks like you're missing a constraint on your data which would give you bad data viz: multiple rows for the same item/date combination.  You need a unique constraint on (item, price_date).

                      If you had that in place (which your current data set won't allow)  you might find that your "select max()" query style actually works. I had to put in a couple of hints on a minor variation accessing your data - it produced far too many rows - but was able to return results very quickly - I've used traditional Oracle syntax, and modified the correlation columns):



                      select

                              /*+ leading (tr pr) use_nl(pr) */

                              tr.item, tr.trans_date, tr.quantity

                          , pr.gross_price

                          , pr.net_price

                          , pr.special_price

                      from

                              xo_stock_trans tr  ,

                              xo_prices pr

                      where

                              tr.trans_date between '01-AUG-2014' and '3-AUG-2014'

                      and    pr.item = tr.item

                      and    pr.price_date = (

                                      select /*+ no_unnest */

                                              max(pr2.price_date)

                                      from    xo_prices pr2

                                      where  pr2.item = tr.item

                                      and    pr2.price_date <= tr.trans_date

                              )

                      ;

                       

                      Using 11.2.0.4 I got the following plan:

                       

                      --------------------------------------------------------------------------------------------------

                      | Id  | Operation                       | Name           | Rows  | Bytes | Cost (%CPU)| Time     |

                      --------------------------------------------------------------------------------------------------

                      |   0 | SELECT STATEMENT                |                |  3423M|   111G|  1824K  (1)| 02:32:02 |

                      |   1 |  NESTED LOOPS                   |                |  3413K|   113M|  1824K  (1)| 02:32:02 |

                      |   2 |   NESTED LOOPS                  |                |  3413K|   113M|  1824K  (1)| 02:32:02 |

                      |*  3 |    TABLE ACCESS FULL            | XO_STOCK_TRANS |   202K|  2960K|  2531  (13)| 00:00:13 |

                      |*  4 |    INDEX RANGE SCAN             | XO_PRICES_IX1  |    16 |       |      2  (0)| 00:00:01 |

                      |   5 |     SORT AGGREGATE              |                |     1 |    12 |            |          |

                      |   6 |      FIRST ROW                  |                |     1 |    12 |      3  (0)| 00:00:01 |

                      |*  7 |       INDEX RANGE SCAN (MIN/MAX)| XO_PRICES_IX1  |     1 |    12 |      3  (0)| 00:00:01 |

                      |   8 |   TABLE ACCESS BY INDEX ROWID   | XO_PRICES      |    17 |   340 |      9  (0)| 00:00:01 |

                      --------------------------------------------------------------------------------------------------

                       

                      Predicate Information (identified by operation id):

                      ---------------------------------------------------

                        3 - filter("TR"."TRANS_DATE">=TO_DATE(' 2014-08-01 00:00:00', 'syyyy-mm-dd

                                    hh24:mi:ss') AND "TR"."TRANS_DATE"<=TO_DATE(' 2014-08-31 00:00:00', 'syyyy-mm-dd

                                    hh24:mi:ss'))

                        4 - access("PR"."ITEM"="TR"."ITEM" AND "PR"."PRICE_DATE"= (SELECT /*+ NO_UNNEST */

                                    MAX("PR2"."PRICE_DATE") FROM "XO_PRICES" "PR2" WHERE "PR2"."PRICE_DATE"<=:B1 AND

                                    "PR2"."ITEM"=:B2))

                        7 - access("PR2"."ITEM"=:B1 AND "PR2"."PRICE_DATE"<=:B2)

                       

                       

                      The order of execution is:  3, 7, 6, 5, 4, 2, 8, 1, 0, i.e.

                      For each relevant stock_trans row, find the max_date for it's price that is no greater than the stock_trans date using the (optimal) min/max range scan, then use that date with the item code to find the matching prices.  If the item/price_date were declared unique then line 4 would be a unique scan, and the access path would be about 7 block gets (mostly buffered) per stock_trans row to get its most appropriate price.

                       

                       

                      Regards

                      Jonathan Lewis