5 Replies Latest reply: Apr 2, 2013 6:17 AM by chris227 RSS

    sum in hierarchy

    AlexAnd
      hi all
      create table tdata 
      ( 
      PERSON_ID number, 
      ORG_ID number, 
      TIMEE number
      );
      
      insert into tdata (PERSON_ID, ORG_ID, TIMEE)
      values (363, 1529, 0);
      
      insert into tdata (PERSON_ID, ORG_ID, TIMEE)
      values (363, 1892, 0);
      
      insert into tdata (PERSON_ID, ORG_ID, TIMEE)
      values (597, 309, 20);
      
      insert into tdata (PERSON_ID, ORG_ID, TIMEE)
      values (597, 905, 20);
      
      insert into tdata (PERSON_ID, ORG_ID, TIMEE)
      values (4730, 905, 20);
      
      create table thierarch
      (
      ORG_ID number, 
      LV number, 
      PARENT_ORG_ID number
      );
      
      insert into thierarch (ORG_ID, LV, PARENT_ORG_ID)
      values (167, 3, 0);
      
      insert into thierarch (ORG_ID, LV, PARENT_ORG_ID)
      values (172, 3, 0);
      
      insert into thierarch (ORG_ID, LV, PARENT_ORG_ID)
      values (905, 1, 1489);
      
      insert into thierarch (ORG_ID, LV, PARENT_ORG_ID)
      values (176, 2, 172);
      
      insert into thierarch (ORG_ID, LV, PARENT_ORG_ID)
      values (309, 1, 176);
      
      insert into thierarch (ORG_ID, LV, PARENT_ORG_ID)
      values (1489, 2, 167);
      
      insert into thierarch (ORG_ID, LV, PARENT_ORG_ID)
      values (1892, 1, 168);
      
      insert into thierarch (ORG_ID, LV, PARENT_ORG_ID)
      values (168, 2, 167);
      
      insert into thierarch (ORG_ID, LV, PARENT_ORG_ID)
      values (1529, 1, 168);
      with t as
       (select org_id, lv, parent_org_id, 'ORG' who_type, 0 timee
          from thierarch
        connect by parent_org_id = prior org_id
         start with parent_org_id = 0
        --
        union all
        --
        select p.person_id, 0, o.org_id, 'PER', p.TIMEE
          from (select *
                  from thierarch
                 where lv = 1
                connect by parent_org_id = prior org_id
                 start with parent_org_id = 0) o,
               (select * from tdata) p
         where o.org_id = p.org_id)
      --
      select rownum,
             org_id,
             who_type,
             parent_org_id,
             t.lv,
             TIMEE,
             (select sum(tt.TIMEE)
                from t tt
               start with tt.org_id = t.org_id
              connect by prior tt.org_id = tt.parent_org_id) as TIMEE
        from t
      connect by parent_org_id = prior org_id
       start with parent_org_id = 0;
      gives
       
          ROWNUM     ORG_ID WHO_TYPE PARENT_ORG_ID         LV      TIMEE      TIMEE
      ---------- ---------- -------- ------------- ---------- ---------- ----------
               1        167 ORG                  0          3          0         40
               2       1489 ORG                167          2          0         40
               3        905 ORG               1489          1          0         40
               4       4730 PER                905          0         20         20
               5        597 PER                905          0         20         40
               6        168 ORG                167          2          0          0
               7       1892 ORG                168          1          0          0
               8        363 PER               1892          0          0          0
               9       1529 ORG                168          1          0          0
              10        363 PER               1529          0          0          0
              11        172 ORG                  0          3          0         20
              12        176 ORG                172          2          0         20
              13        309 ORG                176          1          0         20
              14        597 PER                309          0         20         40
       
      14 rows selected
       
      SQL> 
      but i want
       
          ROWNUM     ORG_ID WHO_TYPE PARENT_ORG_ID         LV      TIMEE      TIMEE
      ---------- ---------- -------- ------------- ---------- ---------- ----------
               1        167 ORG                  0          3          0         40
               2       1489 ORG                167          2          0         40
               3        905 ORG               1489          1          0         40
               4       4730 PER                905          0         20         20
               5        597 PER                905          0         20         20
               6        168 ORG                167          2          0          0
               7       1892 ORG                168          1          0          0
               8        363 PER               1892          0          0          0
               9       1529 ORG                168          1          0          0
              10        363 PER               1529          0          0          0
              11        172 ORG                  0          3          0         20
              12        176 ORG                172          2          0         20
              13        309 ORG                176          1          0         20
              14        597 PER                309          0         20         20
       
      14 rows selected
       
      SQL> 
      so for person 597 must be 20 because hi works in two different org

      what i missed?
        • 1. Re: sum in hierarchy
          Frank Kulash
          Hi,

          The scalar sub-query that computes the wrong value is looking for a specific org_id in result set t, but org_id is not unique. It looks like the combination of org_id and parent_org_id is unique, so if you want to uniquely identify rows, you need to compare both values.
          with t as
           (select org_id, lv, parent_org_id, 'ORG' who_type, 0 timee
              from thierarch
            connect by parent_org_id = prior org_id
             start with parent_org_id = 0
            --
            union all
            --
            select p.person_id, 0, o.org_id, 'PER', p.TIMEE
              from (select *
                      from thierarch
                     where lv = 1
                    connect by parent_org_id = prior org_id
                     start with parent_org_id = 0) o,
                   (select * from tdata) p
             where o.org_id = p.org_id)
          --
          select rownum,
                 org_id,
                 who_type,
                 parent_org_id,
                 t.lv,
                 TIMEE,
                 (select sum(tt.TIMEE)
                    from t tt
                   start with tt.org_id = t.org_id
                      AND  tt.parent_org_id     = t.parent_org_id     -- *****  NEW  *****
                  connect by prior tt.org_id = tt.parent_org_id) as TIMEE
            from t
          connect by parent_org_id = prior org_id
           start with parent_org_id = 0;
          I only added 1 line, 5 line from the end.
          Depending on your data and your requirements, you may need to make the smae kind of change elsewhere in the query where org_id is used.

          This whole query looks unnecessarily complicated. I don't know what you're trying to do, but it's hard to imagine anyting that you might be trying that can't be done with 2, or possible 1, CONNECT BY query, and not 4 like you're using.
          • 2. Re: sum in hierarchy
            AlexAnd
            many thanks Frank

            it's what i missed =\

            >
            This whole query looks unnecessarily complicated. I don't know what you're trying to do, but it's hard to imagine anyting that you might be trying that can't be done with 2, or possible 1, CONNECT BY query, and not 4 like you're using.
            >
            a part of logic is that i want to forming hierarchy based on persons(table tdata) and orgs (tables thierarch) with relation as tdata.org_id = thierarch.org_id
            in result must be persons with full hierarchy of orgs above (dynamic count of level)

            may be you are right and query may be simple but in night it's was first idea

            do you have any idea to simplify query? or may be you have some knowledge for this pattern?
            many thanks
            • 3. Re: sum in hierarchy
              chris227
              >
              This whole query looks unnecessarily complicated. I don't know what you're trying to do, but it's hard to imagine anyting that you might be trying that can't be done with 2, or possible 1, CONNECT BY query, and not 4 like you're using.
              Hi Frank,

              that is exactly what i thought facing this query the frist time.
              That's why i didnt post a "simple" fix, but started to find a rewritten version
              , but didnt find to an end, so i was hoping for some neat solution from you :-)

              I started with a union of both tables and a connect by on that.
              So i got the base structur.
              The last step should "only" to be a, may be analytical, sum up of the subordered timees.
              Here it would be helpful to know if there could be only persons in leaf organisations or anywhere in the tree.
              But i had difficulties to figure out an approbiate order for the analytical function on the fly.
              My problem was that for some nodes on level 2 for examples the sum has to build up on their separate branches.
              Do you know an simple approach for this with connect by?
              That would be interesting ot only for me i guess.

              Regards
              chris
              • 4. Re: sum in hierarchy
                Frank Kulash
                Hi, Chris,
                chris227 wrote:
                ... I started with a union of both tables and a connect by on that.
                So i got the base structur.
                The last step should "only" to be a, may be analytical, sum up of the subordered timees.
                Here it would be helpful to know if there could be only persons in leaf organisations or anywhere in the tree.
                But i had difficulties to figure out an approbiate order for the analytical function on the fly.
                If I understand you, the ORDER BY should be easy; just capture ROWNUM when you do the CONNECT BY.
                It's the windowing clause that's tricky.
                My problem was that for some nodes on level 2 for examples the sum has to build up on their separate branches.
                I think you were trying to do something like this:
                WITH    universe   AS
                (
                     SELECT  org_id
                     ,      lv
                     ,      parent_org_id
                     ,      'ORG'          AS who_type
                     ,      0           AS timee
                         FROM     thierarch
                    --
                    UNION ALL
                    --
                        SELECT  person_id     AS org_id
                     ,      0          AS lv
                     ,      org_id          AS parent_org_id
                     ,      'PER'          AS who_type
                     ,     timee
                     FROM     tdata
                )
                ,     connect_by_results     AS
                (
                     SELECT    ROWNUM     AS r_num
                     ,       universe.*
                     FROM      universe
                     START WITH  parent_org_id  = 0
                     CONNECT BY  parent_org_id  = PRIOR org_id 
                )
                ,     got_descendant_count     AS
                (
                     SELECT     c.*
                     ,     (
                              SELECT  NVL ( MIN (r_num)
                                            , 1E99     -- or any impossibly high number
                                    )
                              FROM    connect_by_results
                              WHERE   r_num     >  c.r_num
                              AND         lv          >= c.lv
                          ) - r_num         AS descendant_count
                     FROM    connect_by_results  c
                )
                SELECT       r_num
                ,       org_id
                ,       who_type
                ,       parent_org_id
                ,       lv
                ,       timee
                ,       SUM (timee) OVER ( ORDER BY         r_num
                                                RANGE BETWEEN  CURRENT ROW
                                            AND         descendant_count FOLLOWING
                                  ) AS total_timee
                FROM       got_descendant_count
                ORDER BY  r_num
                ;
                I find it clearer to use the aggregate SUM function, rather than the analytic SUM:
                WITH    universe   AS
                (
                     SELECT  org_id
                     ,      lv
                     ,      parent_org_id
                     ,      'ORG'          AS who_type
                     ,      0           AS timee
                         FROM     thierarch
                    --
                    UNION ALL
                    --
                        SELECT  person_id     AS org_id
                     ,      0          AS lv
                     ,      org_id          AS parent_org_id
                     ,      'PER'          AS who_type
                     ,     timee
                     FROM     tdata
                )
                ,     got_total_timee          AS
                (
                     SELECT       org_id, parent_org_id
                     ,       SUM (CONNECT_BY_ROOT timee)     AS total_timee
                     FROM       universe
                     CONNECT BY     org_id     = PRIOR parent_org_id
                     GROUP BY  org_id, parent_org_id
                )
                SELECT    ROWNUM     AS r_num
                ,       u.org_id
                ,       u.who_type
                ,       u.parent_org_id
                ,       u.lv
                ,       u.timee
                ,       t.total_timee
                FROM      universe         u
                JOIN       got_total_timee  t  ON   t.org_id         = u.org_id
                                       AND  t.parent_org_id  = u.parent_org_id
                START WITH  u.parent_org_id  = 0
                CONNECT BY  u.parent_org_id  = PRIOR u.org_id 
                ;
                • 5. Re: sum in hierarchy
                  chris227
                  Thanks Frank for this detailed answer.

                  You are right, the first approach is exactly what i tried.
                  I stucked in finding a "simple" expression in the window clause for what you named "got_descendant_count", but obviously there isnt any.

                  Regards.