1 2 Previous Next 29 Replies Latest reply: Dec 13, 2012 6:15 AM by 976910 RSS

    count based on soundex function

    976910
      Hello to all,

      i have little bit confused with query requested from me .
      the query is
      show the count number of the last part of each word in same row according to soundex function
      for example
      the first row has four statment in same row
      ACD BCD AAA
      AFD BCF
      BDE AAC AFC
      BCF AAE
      that mean i have to divided the each statment and get soundex for last part that is in this example is \
      AAA
      BCF
      AFC
      AAE 
      So the soundex only for AAA and AAE is same the mean i have to update table b in Gen column by two and son on for others.
      create table  a(id number(9), Gen_Value varchar2(200));
      
      insert into a vlaues(1, 'ACD BCD AAA, AFD BCF , BDE AAC AFC,BCF AAE');
      insert into a vlaues(2, 'AVF BCD, BBA BBG BCEV, GACD MNF BCV');
      insert into a vlaues(3, 'AFC ABC, BBG HUH ABCE, JHU KK MNK');
      
      
      create table  b(id number(9), Gen_Value varchar2(200), id number(9));
      insert into b values( 'AAA', '0',1);
      insert into b values ('BCEV  ','0',2);
      insert into b values ('AFC','0',1);
      insert into b values ('BCD','0',2);
      insert into b values ('MNK','0',3);
      insert into b values ('ABCE','0',3);
      The results is
      Gen   count  id 
      AAA    2     1
      BCEV   2     2
      AFC     1     1
      BCD     1     2
      MNK    1     3   
      ABCE  2     3
      cheers
      Dheya

      Edited by: 973907 on Nov 29, 2012 12:54 AM

      Edited by: 973907 on Nov 29, 2012 12:55 AM
        • 1. Re: count based on soundex function
          976910
          Any help.
          • 2. Re: count based on soundex function
            chris227
            with data as (
             select
              id
             ,gen_value||',' gen_value
             from a
            )
            ,r  (id, gen_value, l) as (
             select  
              id
             ,substr(gen_value, instr(gen_value, ',') + 1)
             ,substr(
               trim(substr(gen_value,1, instr(gen_value, ',') - 1))
              ,instr(trim(substr(gen_value,1, instr(gen_value, ',') - 1)),' ',-1)
             )
            from data
            union all
             select  
              id
             ,substr(gen_value, instr(gen_value, ',') + 1)
             ,substr(
               trim(substr(gen_value,1, instr(gen_value, ',') - 1))
              ,instr(trim(substr(gen_value,1, instr(gen_value, ',') - 1)),' ',-1)
             )
            from r
            where
            length(gen_value) > 0
            )
            
            select 
             soundex(l)
            ,count(*) cnt
            from r
            group by
             soundex(l)
            order by
             1
            
            SOUNDEX(L) CNT 
            A000 2 
            A120 3 
            B210 3 
            B230 1 
            M200 1 
            
            just to show the values, use listagg
            
            select 
             soundex(l)
            ,count(*) cnt
            ,listagg(l) within group (order by l) v
            from r
            group by
             soundex(l)
            order by
             1
            
            SOUNDEX(L) CNT V 
            A000 2 AAA AAE 
            A120 3 ABC ABCE AFC 
            B210 3 BCEV BCF BCV 
            B230 1 BCD 
            M200 1 MNK 
            Edited by: chris227 on 29.11.2012 01:51
            • 3. Re: count based on soundex function
              padders
              Explain why you regard 'AAE' as a duplicate of 'AAA' and not the other way around.

              In other words when you find that 'AAA' and 'AAE' both SOUNDEX to 'A000' how do you chose which of 'AAA' or 'AAE' should go into table 'B', is it because 'AAA' has the lowest 'ID' in table 'A' or because 'AAA' is alphabetically the lowest - or some other reason.
              • 4. Re: count based on soundex function
                976910
                thank you ..

                We select any of them. 'AAA' or AAE. or the word that has long length.

                regards
                Dheya
                • 5. Re: count based on soundex function
                  chris227
                  973907 wrote:
                  We select any of them. 'AAA' or AAE. or the word that has long length.
                  No prob
                  with data as (
                   select
                    id
                   ,gen_value||',' gen_value
                   from a
                  )
                  ,r  (id, gen_value, l) as (
                   select  
                    id
                   ,substr(gen_value, instr(gen_value, ',') + 1)
                   ,substr(
                     trim(substr(gen_value,1, instr(gen_value, ',') - 1))
                    ,instr(trim(substr(gen_value,1, instr(gen_value, ',') - 1)),' ',-1)
                   )
                  from data
                  union all
                   select  
                    id
                   ,substr(gen_value, instr(gen_value, ',') + 1)
                   ,substr(
                     trim(substr(gen_value,1, instr(gen_value, ',') - 1))
                    ,instr(trim(substr(gen_value,1, instr(gen_value, ',') - 1)),' ',-1)
                   )
                  from r
                  where
                  length(gen_value) > 0
                  )
                  
                  select 
                   soundex(l)
                  ,count(*) cnt
                  ,max(l) keep ( dense_rank last order by length(l), l desc nulls first) v
                  from r
                  group by
                   soundex(l)
                  order by
                   1
                  
                  SOUNDEX(L) CNT V 
                  A000 2 AAA 
                  A120 3 ABCE 
                  B210 3 BCEV 
                  B230 1 BCD 
                  M200 1 MNK 
                  • 6. Re: count based on soundex function
                    976910
                    That's is fine and it is working Ok.
                    SQL> with data as (
                      2   select
                      3    id
                      4   ,gen_value||',' gen_value
                      5   from a
                      6  )
                      7  ,r  (id, gen_value, l) as (
                      8   select
                      9    id
                     10   ,substr(gen_value, instr(gen_value, ',') + 1)
                     11   ,substr(
                     12     trim(substr(gen_value,1, instr(gen_value, ',') - 1))
                     13    ,instr(trim(substr(gen_value,1, instr(gen_value, ',') - 1)),' ',-1)
                     14   )
                     15  from data
                     16  union all
                     17   select
                     18    id
                     19   ,substr(gen_value, instr(gen_value, ',') + 1)
                     20   ,substr(
                     21     trim(substr(gen_value,1, instr(gen_value, ',') - 1))
                     22    ,instr(trim(substr(gen_value,1, instr(gen_value, ',') - 1)),' ',-1)
                     23   )
                     24  from r
                     25  where
                     26  length(gen_value) > 0
                     27  )
                     28   select id,
                     29   soundex(l)
                     30  ,count(*) cnt
                     31  ,max(l) keep ( dense_rank last order by length(l), l desc nulls first) v
                     32  from r
                     33  group by
                     34   soundex(l),id
                     35  order by
                     36   1;
                    
                    
                            ID SOUN        CNT V
                    ---------- ---- ---------- ------------
                             1 A000          2  AAA
                             1 A120          1  AFC
                             1 B210          1  BCF
                             2 B210          2  BCEV
                             2 B230          1  BCD
                             3 A120          2  ABCE
                             3 M200          1  MNK
                    
                    7 rows selected.
                    
                    
                    
                    SQL>
                    i need also to update the count the words to second table already exist called by IT has the follwoing attributtes.
                    create table B (id number(9), words varchar2(50), cnt number(9));
                    
                    insert into b values (1,'ACD BCD AAA',null);
                    insert into b values (1,'BDE AAC AFC',null);         
                    insert into b values (1,'AFD BCF',null);       
                    insert into b values (2,'BBA BBG BCEV',null);
                    insert into b values (2',AVF BCD',null);
                    insert into b values (3,'BBG HUH ABCE',null);
                    insert into b values (3,'JHU KK MNK',null);
                    expected output
                    id         word                   cnt 
                    1       ACD BCD AAA          2
                    1       BDE AAC AFC          1
                    1       AFD BCF                 1
                    2       BBA BBG BCEV         2
                    2       AVF BCD                  1
                    3       BBG HUH ABCE         2
                    3       JHU KK MNK             1
                    Note: the compare based on soundex and ONLY for last part of sentence.
                    CHERRS
                    DHEYA

                    Edited by: 973907 on Dec 6, 2012 8:10 PM
                    • 7. Re: count based on soundex function
                      976910
                      the update instead of last select.

                      regards
                      • 8. Re: count based on soundex function
                        976910
                        Hello anybody help me ?
                        • 9. Re: count based on soundex function
                          chris227
                          update b
                          set cnt = (
                          with data as (
                           select
                            id
                           ,gen_value||',' gen_value
                           from a
                          )
                          ,r  (id, gen_value, l) as (
                           select  
                            id
                           ,substr(gen_value, instr(gen_value, ',') + 1)
                           ,substr(
                             trim(substr(gen_value,1, instr(gen_value, ',') - 1))
                            ,instr(trim(substr(gen_value,1, instr(gen_value, ',') - 1)),' ',-1)
                           )
                          from data
                          union all
                           select  
                            id
                           ,substr(gen_value, instr(gen_value, ',') + 1)
                           ,substr(
                             trim(substr(gen_value,1, instr(gen_value, ',') - 1))
                            ,instr(trim(substr(gen_value,1, instr(gen_value, ',') - 1)),' ',-1)
                           )
                          from r
                          where
                          length(gen_value) > 0
                          )
                          , cnts as ( 
                          select 
                          -- soundex(l) s
                          count(*) cnt
                          ,max(l) keep ( dense_rank last order by length(l), l desc nulls first) v
                          --,id
                          from r
                          group by
                           soundex(l), id
                          )
                          
                          select
                           cnts.cnt
                          from cnts
                          where
                          words like '% '||v
                          )
                          
                          7 row(s) updated.
                          
                          select * from b
                          
                          ID WORDS CNT 
                          1 ACD BCD AAA 2 
                          1 BDE AAC AFC 1 
                          3 JHU KK MNK 1 
                          2 AVF BCD 1 
                          2 BBA BBG BCEV 2 
                          3 BBG HUH ABCE 2 
                          1 AFD BCF 1 
                          Edited by: chris227 on 07.12.2012 08:21
                          words like '% '||v with space
                          • 10. Re: count based on soundex function
                            AlbertoFaenza
                            Hi,

                            having table a in input you can create rows in table b as INSERT INTO B SELECT .. from the query below:
                            CREATE TABLE  a(id NUMBER(9), gen_value VARCHAR2(200));
                            
                            INSERT INTO a VALUES(1, 'ACD BCD AAA, AFD BCF, BDE AAC AFC,BCF AAE');
                            INSERT INTO a VALUES(2, 'AVF BCD, BBA BBG BCEV, GACD MNF BCV');
                            INSERT INTO a VALUES(3, 'AFC ABC, BBG HUH ABCE, JHU KK MNK');
                            COMMIT;
                            
                            CREATE TABLE  b(id NUMBER(9), Gen_Value VARCHAR2(200), cnt NUMBER(9));
                            
                            INSERT INTO b (id, gen_value, cnt)
                            WITH got_values AS
                            (
                               SELECT id, TRIM((column_value).getstringval()) strorig
                                    , REGEXP_SUBSTR((column_value).getstringval(),'\w+$') str
                                 FROM a, xmltable(('"'||REPLACE(gen_value,',','","')||'"')) 
                            )
                            , got_sdx_cnt AS
                            (
                               SELECT id, strorig
                                    , COUNT(*) OVER (PARTITION BY id, SOUNDEX(str)) cnt
                                    , ROW_NUMBER() OVER (PARTITION BY id, SOUNDEX(str) ORDER BY LENGTH(str) DESC, ROWNUM) rn
                                 FROM got_values
                            )
                            SELECT id, strorig, cnt 
                              FROM got_sdx_cnt
                             WHERE rn=1;
                             
                            SELECT * FROM b;
                            
                                    ID GEN_VALUE              CNT
                            ---------- --------------- ----------
                                     1 ACD BCD AAA              2
                                     1 BDE AAC AFC              1
                                     1 AFD BCF                  1
                                     2 BBA BBG BCEV             2
                                     2 AVF BCD                  1
                                     3 BBG HUH ABCE             2
                                     3 JHU KK MNK               1
                            The code above is working on 11g.

                            Regards.
                            Al

                            Edited by: Alberto Faenza on Dec 7, 2012 9:18 PM

                            Edited by: Alberto Faenza on Dec 8, 2012 2:33 PM
                            • 11. Re: count based on soundex function
                              Ayham
                              remove
                              • 12. Re: count based on soundex function
                                Ayham
                                remove
                                • 13. Re: count based on soundex function
                                  976910
                                  But when i add just one word Like Egypt twice in row id no=1
                                  and also in one time in row id =3.
                                  the output will be empty for counting.
                                  like this:

                                  drop table a;
                                  create table  a(id number(9), gen_value varchar2(200));
                                   
                                  insert into a values(1, 'ACD BCD AAA, AFD BCF , Egypt,BDE AAC AFC,Egypt,BCF AAE');
                                  insert into a values(2, 'AVF BCD, BBA BBG BCEV, GACD MNF BCV');
                                  insert into a values(3, 'AFC ABC, BBG HUH ABCE, Egypt,JHU KK MNK');
                                  
                                  drop table B ;
                                  create table B (bid number(9), words varchar2(50), per number(9));
                                   
                                  insert into b values (1,'ACD BCD AAA',null);
                                  insert into b values (1,'BDE AAC AFC',null);         
                                  insert into b values (1,'AFD BCF',null);    
                                  insert into b values (1,'EGYPT',null);
                                  insert into b values (1,'BDE AAC AFC',null);         
                                  insert into b values (1,'AFD BCF',null);     
                                  insert into b values (2,'BBA BBG BCEV',null);
                                  insert into b values (2,'AVF BCD',null);
                                  insert into b values (3,'BBG HUH ABCE',null);
                                  insert into b values (3,'JHU KK MNK',null);
                                  insert into b values (3,'EGYPT',null);
                                  update B
                                  set per = (
                                  with data as (
                                   select
                                    id
                                   ,gen_value||',' gen_value
                                   from a
                                  )
                                  ,r  (id, gen_value, l) as (
                                   select  
                                    id
                                   ,substr(gen_value, instr(gen_value, ',') + 1)
                                   ,substr(
                                     trim(substr(gen_value,1, instr(gen_value, ',') - 1))
                                    ,instr(trim(substr(gen_value,1, instr(gen_value, ',') - 1)),' ',-1)
                                   )
                                  from data
                                  union all
                                   select  
                                    id
                                   ,substr(gen_value, instr(gen_value, ',') + 1)
                                   ,substr(
                                     trim(substr(gen_value,1, instr(gen_value, ',') - 1))
                                    ,instr(trim(substr(gen_value,1, instr(gen_value, ',') - 1)),' ',-1)
                                   )
                                  from r
                                  where
                                  length(gen_value) > 0
                                  )
                                  , cnts as ( 
                                  select 
                                  -- soundex(l) s
                                  count(*) cnt
                                  ,max(l) keep ( dense_rank last order by length(l), l desc nulls first) v
                                  --,id
                                  from r
                                  group by
                                   soundex(l), id
                                  )
                                   select
                                   cnts.cnt
                                  from cnts
                                  where
                                  words like '%'||v );
                                  select * from b;
                                         BID WORDS                       PER
                                  ---------- -------------------- ----------
                                           1 ACD BCD AAA                   2
                                           1 BDE AAC AFC                   1
                                           1 AFD BCF                          1
                                           1 EGYPT                                                   -- This count is empty
                                           1 BDE AAC AFC                   1
                                           1 AFD BCF                          1
                                           2 BBA BBG BCEV                  2
                                           2 AVF BCD                           1
                                           3 BBG HUH ABCE                  2
                                           3 JHU KK MNK                      1
                                           3 EGYPT                                                      -- this is also empty . Why?
                                  
                                  11 rows selected.
                                  
                                  SQL>
                                  when i tried to add soundex for words and V.
                                  for last part like this
                                   select
                                   cnts.cnt
                                  from cnts
                                  where
                                  soundex (words) like '%'||soundex (v) );
                                  The error msg appear like this
                                  SQL> /
                                  with data as (
                                  *
                                  ERROR at line 3:
                                  ORA-01427: single-row subquery returns more than one row
                                  
                                  
                                  SQL>
                                  the expected results should be like this
                                         BID WORDS                       PER
                                  ---------- -------------------- ----------
                                           1 ACD BCD AAA                  2
                                           1 BDE AAC AFC                   1
                                           1 AFD BCF                          1
                                           1 EGYPT                            2
                                           1 BDE AAC AFC                   1
                                           1 AFD BCF                          1
                                           2 BBA BBG BCEV                 2
                                           2 AVF BCD                         1
                                           3 BBG HUH ABCE                2
                                           3 JHU KK MNK                    1
                                           3 EGYPT                           1                  
                                  Note : i just change id in table B to differentiate with id in table A. and column CNT to Per to differentiate in SQL code.

                                  the results of this is acceptable but the update part is not included:
                                  with data as (
                                   select
                                    id
                                   , gen_value ||','  gen_value 
                                   from a
                                  )
                                  ,r  (id,  gen_value , l) as (
                                   select  
                                    id
                                   ,substr( gen_value , instr( gen_value , ',') + 1)
                                   ,substr(
                                     trim(substr( gen_value ,1, instr( gen_value , ',') - 1))
                                    ,instr(trim(substr( gen_value ,1, instr( gen_value , ',') - 1)),' ',-1)
                                   )
                                  from data
                                  union all
                                   select  
                                    id
                                   ,substr( gen_value , instr( gen_value , ',') + 1)
                                   ,substr(
                                     trim(substr( gen_value ,1, instr( gen_value , ',') - 1))
                                    ,instr(trim(substr( gen_value ,1, instr( gen_value , ',') - 1)),' ',-1)
                                   )
                                  from r
                                  where
                                  length( gen_value ) > 0
                                  )
                                   select id,
                                   soundex(l)as l
                                  ,count(*) cnt
                                  ,trim(max(l) keep ( dense_rank last order by length(l), l desc nulls first)) v
                                  from r where l is not null
                                  group by
                                   soundex(l),id
                                   order by 1;
                                  Edited by: 973907 on Dec 8, 2012 9:07 PM
                                  • 14. Re: count based on soundex function
                                    AlbertoFaenza
                                    Hi,

                                    why are you answering at me and use the code from someone else.

                                    If you try it with my code here below is the result. As I mentioned in my previous post I suggest to insert directly the final value instead of updating it a second time.

                                    Check here below:
                                    CREATE TABLE  a(id NUMBER(9), gen_value VARCHAR2(200));
                                    
                                    insert into a values(1, 'ACD BCD AAA, AFD BCF , Egypt,BDE AAC AFC,Egypt,BCF AAE');
                                    insert into a values(2, 'AVF BCD, BBA BBG BCEV, GACD MNF BCV');
                                    insert into a values(3, 'AFC ABC, BBG HUH ABCE, Egypt,JHU KK MNK');
                                    COMMIT;
                                    
                                    CREATE TABLE  b(id NUMBER(9), Gen_Value VARCHAR2(200), cnt NUMBER(9));
                                    
                                    INSERT INTO b (id, gen_value, cnt)
                                    WITH got_values AS
                                    (
                                       SELECT id, TRIM((column_value).getstringval()) strorig
                                            , REGEXP_SUBSTR((column_value).getstringval(),'\w+$') str
                                         FROM a, xmltable(('"'||REPLACE(gen_value,',','","')||'"')) 
                                    )
                                    , got_sdx_cnt AS
                                    (
                                       SELECT id, strorig
                                            , COUNT(*) OVER (PARTITION BY id, SOUNDEX(str)) cnt
                                            , ROW_NUMBER() OVER (PARTITION BY id, SOUNDEX(str) ORDER BY LENGTH(str) DESC, ROWNUM) rn
                                         FROM got_values
                                    )
                                    SELECT id, strorig, cnt 
                                      FROM got_sdx_cnt
                                     WHERE rn=1;
                                     
                                    SELECT * FROM b;
                                    
                                            ID GEN_VALUE              CNT
                                    ---------- --------------- ----------
                                             1 ACD BCD AAA              2
                                             1 BDE AAC AFC              1
                                             1 Egypt                    2
                                             1 AFD BCF                  1
                                             2 BBA BBG BCEV             2
                                             2 AVF BCD                  1
                                             3 BBG HUH ABCE             2
                                             3 Egypt                    1
                                             3 JHU KK MNK               1
                                    Regards.
                                    Al
                                    1 2 Previous Next