11 Replies Latest reply on Jun 22, 2018 8:35 PM by Frank Kulash

    Getting incorrect data due to join conditions

    JSMQ
          INSERT INTO TGT_TABLE
            SELECT table1.DA_SEC,
      
      
                   table1.DA_REG,
      
      
                   table1.PROJ,
      
      
                   table1.SOL,
      
      
                   table1.SCAN_YEAR_MON,
      
      
                   to_char(TO_DATE(table1.SCAN_YEAR_MON,'YYYY-MM'),'YYYY'),
      
      
                   to_char(TO_DATE(table1.SCAN_YEAR_MON,'YYYY-MM'),'MM'),
      
      
                   COUNT(DISTINCT table1.ISSUE_ID),
      
      
                   COUNT(DISTINCT table1.USERNAME),
      
      
                   COUNT(DISTINCT table1.PATH),
      
      
                   sum(st_cnt.COUNT_FILES),
      
      
                   sum(st_cnt.COUNT_SCANS),
      
      
                   sum(unq_Pro.HIGH_CNT),
      
      
                   sum(unq_Pro.MEDIUM_CNT),
      
      
                   sum(unq_Pro.LOW_CNT),
      
      
                   sum(SUPPR_HIGH) - sum(UNSUPPR_HIGH),
      
      
                   sum(SUPPR_MEDIUM) - sum(UNSUPPR_MEDIUM),
      
      
                   sum(SUPPR_LOW) - sum(UNSUPPR_LOW),
      
      
                   CURRENT_DATE
      
      
                      FROM table1
      
      
                           JOIN
      
      
                           (SELECT PROJ,SOL,
      
      
                                   SUM(CASE WHEN ISSUE_IMP = 'high' THEN 1 ELSE 0 END) SUPPR_HIGH,
      
      
                                   SUM(CASE WHEN ISSUE_IMP = 'medium' THEN 1 ELSE 0 END) SUPPR_MEDIUM,
      
      
                                   SUM(CASE WHEN ISSUE_IMP = 'low' THEN 1 ELSE 0 END) SUPPR_LOW
      
      
                              from table1
      
      
                                   LEFT OUTER JOIN
      
      
                                   SUPPR_ISSUE@DATABASE_LINK1
      
      
                                ON table1.ISSUE_ID = SUPPR_ISSUE.ISSUE_ID
      
      
                             GROUP BY PROJ,SOL
      
      
                           ) SUPPR
      
      
                        ON table1.PROJ = SUPPR.PROJ
      
      
                           JOIN
      
      
                           (SELECT PROJ,SOL,
      
      
                                   SUM(CASE WHEN ISSUE_IMP = 'high' THEN 1 ELSE 0 END) UNSUPPR_HIGH,
      
      
                                   SUM(CASE WHEN ISSUE_IMP = 'medium' THEN 1 ELSE 0 END) UNSUPPR_MEDIUM,
      
      
                                   SUM(CASE WHEN ISSUE_IMP = 'low' THEN 1 ELSE 0 END) UNSUPPR_LOW
      
      
                              FROM table1
      
      
                                   LEFT OUTER JOIN
      
      
                                   UNSUPPR_ISSUE@DATABASE_LINK1
      
      
                                ON table1.ISSUE_ID = UNSUPPR_ISSUE.ISSUE_ID
      
      
                             GROUP BY PROJ,SOL
      
      
                           ) UNSUPPR
      
      
                        ON table1.PROJ = UNSUPPR.PROJ            
      
      
                   ) t1
      
      
                   JOIN
      
      
                   (SELECT PROJ,SOL,SCAN_YEAR,SCAN_MONTH,COUNT(DISTINCT PATH) COUNT_FILES,COUNT(DISTINCT SCAN_ID) COUNT_SCANS
      
      
                      FROM table2
      
      
                     GROUP BY PROJ,SOL,SCAN_YEAR,SCAN_MONTH
      
      
                   ) st_cnt
      
      
                ON st_cnt.PROJ = table1.PROJ --AND st_cnt.SOL = table1.SOL
      
      
                   (SELECT
      
      
                       unq_iss.PROJ,
      
      
                       unq_iss.SOL,
      
      
                       unq_iss.SCAN_YEAR_MON,
      
      
                       sum(CASE WHEN unq_iss.ISSUE_IMP = 'high' THEN 1 ELSE 0 END) HIGH_CNT,
      
      
                       sum(CASE WHEN unq_iss.ISSUE_IMP = 'medium' THEN 1 ELSE 0 END) MEDIUM_CNT,
      
      
                       sum(CASE WHEN unq_iss.ISSUE_IMP = 'low' THEN 1 ELSE 0 END) LOW_CNT
      
      
                     FROM (SELECT DISTINCT
      
      
                                  table1.ISSUE_ID,
      
      
                                  table1.ISSUE_IMP,
      
      
                                  table1.PROJ,
      
      
                                  table1.SOL,
      
      
                                  table1.SCAN_YEAR_MON
      
      
                             FROM table1
      
      
                          ) unq_iss
      
      
                     GROUP BY unq_iss.PROJ, unq_iss.SOL, unq_iss.SCAN_YEAR_MON
      
      
                   ) unq_Pro
      
      
                ON unq_Pro.PROJ = t1.PROJ
      
      
               AND unq_Pro.SCAN_YEAR_MON = t1.SCAN_YEAR_MON
      
      
             GROUP BY table1.DA_SEC,table1.DA_REG,table1.PROJ,table1.SOL,table1.SCAN_YEAR_MON;
      
      
      
      
      
      
        CREATE TABLE TGT_TABLE
      (
      
      
        DA_SEC                    VARCHAR2(100 CHAR),
      
      
        DA_REG                    VARCHAR2(100 CHAR),
      
      
        PROJ                      VARCHAR2(256 CHAR) NOT NULL,
      
      
        SOL                      VARCHAR2(256 CHAR),
      
      
        SCAN_YEAR_MON                 VARCHAR2(7),
      
      
        SCAN_YEAR                     VARCHAR2(7),
      
      
        SCAN_MONTH                    VARCHAR2(7),
      
      
        UNIQ_ISSUES_CNT             NUMBER(10)         NOT NULL,
      
      
        USER_CNT                      NUMBER(10)         NOT NULL,
      
      
        ISSUES_CNT NUMBER(10)         NOT NULL,
      
      
        TOTAL_FILES_SCANNED_CNT       NUMBER(10)         NOT NULL,
      
      
        TOTAL_SCAN_CNT                NUMBER(10)         NOT NULL,
      
      
        UNIQUE_HIGH_ISSUE_CNT         NUMBER(10)         NOT NULL,
      
      
        UNIQUE_MEDIUM_ISSUE_CNT       NUMBER(10)         NOT NULL,
      
      
        UNIQUE_LOW_ISSUE_CNT          NUMBER(10)         NOT NULL,
      
      
        HIGH_SUPPR_CNT           NUMBER(10)         NOT NULL,
      
      
        MEDIUM_SUPPR_CNT         NUMBER(10)         NOT NULL,
      
      
        LOW_SUPPR_CNT            NUMBER(10)         NOT NULL,
      
      
        LOAD_DATE_TIME                DATE               NOT NULL,
      
      
        CONSTRAINT pk_tgt_proj_scan_summary_id PRIMARY KEY (DA_SEC, DA_REG, PROJ,SOL,SCAN_YEAR_MON)
      
      
      );
      insert into TGT_TABLE(DA_SEC, DA_REG,PROJ,SOL,SCAN_YEAR_MON,SCAN_YEAR,SCAN_MONTH,UNIQ_ISSUES_CNT,USER_CNT,ISSUES_CNT,TOTAL_FILES_SCANNED_CNT,TOTAL_SCAN_CNT,UNIQUE_HIGH_ISSUE_CNT,UNIQUE_MEDIUM_ISSUE_CNT,UNIQUE_LOW_ISSUE_CNT,HIGH_SUPPR_CNT,MEDIUM_SUPPR_CNT,LOW_SUPPR_CNT,LOAD_DATE_TIME) values('RGC','SDEF','csproj','creditvan.sln',2018-05,2018,05,12,1,4,5670,15780,678,0,66,66,0,0,07-JUN-18)
      
      
      
      
      
      

       

      Hi - Here am trying to insert the records to the target table using this stored procedure. But am getting incorrect data for these two columns where i need only count(distinct(path) and scan_id in these columns and i want to remove the sum function.

                   sum(st_cnt.COUNT_FILES),

                   sum(st_cnt.COUNT_SCANS),

      If i use SUM ,it sums up all the column values irrespective of all year and month where i need the count for project,user,year and month wise. If i remove the sum function,then i need to put it in group by clause where again it's giving different output.

        • 1. Re: Getting incorrect data due to join conditions
          Gary_A
          1. to_char(TO_DATE(table1.SCAN_YEAR_MON,'YYYY-MM'),'YYYY'),   
          2.              to_char(TO_DATE(table1.SCAN_YEAR_MON,'YYYY-MM'),'MM'), 

           

          Why to_char a to_date? This is just the same as

           

          substr(table1.SCAN_YEAR_MON,1,4),

          substr(table1.SCAN_YEAR_MON,6)

           

          There is no DDL for table1.

          • 2. Re: Getting incorrect data due to join conditions
            Frank Kulash

            Hi,

             

            Whenever you have a problem, please post a little sample data (CREATE TABLE and INSERT statements, relevant columns only) from all tables involved, so that the people who want to help you can re-create the problem and test their ideas.
            Also post the exact results you want from that data, and an explanation of how you get those results from that data, with specific examples.
            If you're asking about a DML statement, such as INSERT, then the CREATE TABLE and INSERT statements you post should re-create the tables as they are before the DML, and the results  will be the contents of the changed table(s) when everything is finished.

            Always say which version of Oracle you're using (for example, 12.2.0.1.0).
            See the forum FAQ: Re: 2. How do I ask a question on the forums?

             

            Whatever you're doing to get all those blank lines in you message, please don't do it.  If you can't figure out how to stop them when you post your message, then edit your message after it's posted, and get rid of them then.

            • 3. Re: Getting incorrect data due to join conditions
              John Thorton

              Thread title reports incorrect data.

              Thread post does NOT ever post what "correct" data is expected or desired.

              • 4. Re: Getting incorrect data due to join conditions
                JSMQ

                For these 2 columns,i'd like to store the distinct count of path and scan_id column values from table 2 into tgt_table (should be group by project,scan_year,scan_month wise) but the current code sum up all the values where the query is retrieving

                           sum(st_cnt.COUNT_FILES),

                             sum(st_cnt.COUNT_SCANS)

                for example i consider this project as sample data and this what i expect

                     

                PROJSCAN_YEARSCAN_MONTHCOUNT_FILESCOUNT_SCANS
                Custprof2018556175

                but am getting

                     

                PROJSCAN_YEARSCAN_MONTHCOUNT_FILESCOUNT_SCANS
                Custprof2018535003500
                • 5. Re: Getting incorrect data due to join conditions
                  Frank Kulash

                  HI,

                  SenK wrote:

                   

                  For these 2 columns,i'd like to store the distinct count of path and scan_id column values from table 2 into tgt_table (should be group by project,scan_year,scan_month wise) but the current code sum up all the values where the query is retrieving

                  sum(st_cnt.COUNT_FILES),

                  sum(st_cnt.COUNT_SCANS)

                  ...

                  SUM (column)   isn't the way to count distinct values.

                  COUNT (DISTINCT column)  is.  (More precisely, it's one way, often the best.)

                  • 6. Re: Getting incorrect data due to join conditions
                    JSMQ

                    Yes Frank, that's what the query is designed inside "st_cnt "

                    SELECT PROJ,SOL,SCAN_YEAR,SCAN_MONTH,COUNT(DISTINCT PATH) COUNT_FILES,COUNT(DISTINCT SCAN_ID) COUNT_SCANS 

                                      FROM table2    GROUP BY PROJ,SOL,SCAN_YEAR,SCAN_MONTH

                     

                    When i tried to run as a single select statement,i was able to get the desired output but as a stored procedure am getting huge values

                    • 7. Re: Getting incorrect data due to join conditions
                      John Thorton

                      SenK wrote:

                       

                      When i tried to run as a single select statement,i was able to get the desired output but as a stored procedure am getting huge values

                      Have the procedure author fix the buggy code.

                      • 8. Re: Getting incorrect data due to join conditions
                        JSMQ

                        I removed the SUM function from the main query and i just passed the value to the column value of target table since the function count(distinct(col)) is already implied in subquery and finally got the desired output.

                        • 9. Re: Getting incorrect data due to join conditions
                          JSMQ

                          Actually this code was rewritten by Ebit in the previous thread. Instead of passing disgusting comments, try to share some useful and productive suggestions. i never seen you so far in giving productive answer.

                          • 10. Re: Getting incorrect data due to join conditions
                            John Thorton

                            SenK wrote:

                             

                            i never seen you so far in giving productive answer.

                            Then your observation skills are on par with Oracle coding skills.

                             

                            You'd benefit from conforming to content of URL below

                            How do I ask a question on the forums?

                            • 11. Re: Getting incorrect data due to join conditions
                              Frank Kulash

                              Hi,

                              SenK wrote:

                               

                              I removed the SUM function from the main query and i just passed the value to the column value of target table since the function count(distinct(col)) is already implied in subquery and finally got the desired output.

                              That's good news!

                               

                              Don't forget to mark the thread as "Answered"; it can help other people with similar problems, and will save time for people looking for questions to answer.