This discussion is archived
1 2 Previous Next 29 Replies Latest reply: Dec 13, 2012 4:15 AM by 976910 RSS

count based on soundex function

976910 Newbie
Currently Being Moderated
Hello to all,

i have little bit confused with query requested from me .
the query is
show the count number of the last part of each word in same row according to soundex function
for example
the first row has four statment in same row
ACD BCD AAA
AFD BCF
BDE AAC AFC
BCF AAE
that mean i have to divided the each statment and get soundex for last part that is in this example is \
AAA
BCF
AFC
AAE 
So the soundex only for AAA and AAE is same the mean i have to update table b in Gen column by two and son on for others.
create table  a(id number(9), Gen_Value varchar2(200));

insert into a vlaues(1, 'ACD BCD AAA, AFD BCF , BDE AAC AFC,BCF AAE');
insert into a vlaues(2, 'AVF BCD, BBA BBG BCEV, GACD MNF BCV');
insert into a vlaues(3, 'AFC ABC, BBG HUH ABCE, JHU KK MNK');


create table  b(id number(9), Gen_Value varchar2(200), id number(9));
insert into b values( 'AAA', '0',1);
insert into b values ('BCEV  ','0',2);
insert into b values ('AFC','0',1);
insert into b values ('BCD','0',2);
insert into b values ('MNK','0',3);
insert into b values ('ABCE','0',3);
The results is
Gen   count  id 
AAA    2     1
BCEV   2     2
AFC     1     1
BCD     1     2
MNK    1     3   
ABCE  2     3
cheers
Dheya

Edited by: 973907 on Nov 29, 2012 12:54 AM

Edited by: 973907 on Nov 29, 2012 12:55 AM
  • 1. Re: count based on soundex function
    976910 Newbie
    Currently Being Moderated
    Any help.
  • 2. Re: count based on soundex function
    chris227 Guru
    Currently Being Moderated
    with data as (
     select
      id
     ,gen_value||',' gen_value
     from a
    )
    ,r  (id, gen_value, l) as (
     select  
      id
     ,substr(gen_value, instr(gen_value, ',') + 1)
     ,substr(
       trim(substr(gen_value,1, instr(gen_value, ',') - 1))
      ,instr(trim(substr(gen_value,1, instr(gen_value, ',') - 1)),' ',-1)
     )
    from data
    union all
     select  
      id
     ,substr(gen_value, instr(gen_value, ',') + 1)
     ,substr(
       trim(substr(gen_value,1, instr(gen_value, ',') - 1))
      ,instr(trim(substr(gen_value,1, instr(gen_value, ',') - 1)),' ',-1)
     )
    from r
    where
    length(gen_value) > 0
    )
    
    select 
     soundex(l)
    ,count(*) cnt
    from r
    group by
     soundex(l)
    order by
     1
    
    SOUNDEX(L) CNT 
    A000 2 
    A120 3 
    B210 3 
    B230 1 
    M200 1 
    
    just to show the values, use listagg
    
    select 
     soundex(l)
    ,count(*) cnt
    ,listagg(l) within group (order by l) v
    from r
    group by
     soundex(l)
    order by
     1
    
    SOUNDEX(L) CNT V 
    A000 2 AAA AAE 
    A120 3 ABC ABCE AFC 
    B210 3 BCEV BCF BCV 
    B230 1 BCD 
    M200 1 MNK 
    Edited by: chris227 on 29.11.2012 01:51
  • 3. Re: count based on soundex function
    padders Pro
    Currently Being Moderated
    Explain why you regard 'AAE' as a duplicate of 'AAA' and not the other way around.

    In other words when you find that 'AAA' and 'AAE' both SOUNDEX to 'A000' how do you chose which of 'AAA' or 'AAE' should go into table 'B', is it because 'AAA' has the lowest 'ID' in table 'A' or because 'AAA' is alphabetically the lowest - or some other reason.
  • 4. Re: count based on soundex function
    976910 Newbie
    Currently Being Moderated
    thank you ..

    We select any of them. 'AAA' or AAE. or the word that has long length.

    regards
    Dheya
  • 5. Re: count based on soundex function
    chris227 Guru
    Currently Being Moderated
    973907 wrote:
    We select any of them. 'AAA' or AAE. or the word that has long length.
    No prob
    with data as (
     select
      id
     ,gen_value||',' gen_value
     from a
    )
    ,r  (id, gen_value, l) as (
     select  
      id
     ,substr(gen_value, instr(gen_value, ',') + 1)
     ,substr(
       trim(substr(gen_value,1, instr(gen_value, ',') - 1))
      ,instr(trim(substr(gen_value,1, instr(gen_value, ',') - 1)),' ',-1)
     )
    from data
    union all
     select  
      id
     ,substr(gen_value, instr(gen_value, ',') + 1)
     ,substr(
       trim(substr(gen_value,1, instr(gen_value, ',') - 1))
      ,instr(trim(substr(gen_value,1, instr(gen_value, ',') - 1)),' ',-1)
     )
    from r
    where
    length(gen_value) > 0
    )
    
    select 
     soundex(l)
    ,count(*) cnt
    ,max(l) keep ( dense_rank last order by length(l), l desc nulls first) v
    from r
    group by
     soundex(l)
    order by
     1
    
    SOUNDEX(L) CNT V 
    A000 2 AAA 
    A120 3 ABCE 
    B210 3 BCEV 
    B230 1 BCD 
    M200 1 MNK 
  • 6. Re: count based on soundex function
    976910 Newbie
    Currently Being Moderated
    That's is fine and it is working Ok.
    SQL> with data as (
      2   select
      3    id
      4   ,gen_value||',' gen_value
      5   from a
      6  )
      7  ,r  (id, gen_value, l) as (
      8   select
      9    id
     10   ,substr(gen_value, instr(gen_value, ',') + 1)
     11   ,substr(
     12     trim(substr(gen_value,1, instr(gen_value, ',') - 1))
     13    ,instr(trim(substr(gen_value,1, instr(gen_value, ',') - 1)),' ',-1)
     14   )
     15  from data
     16  union all
     17   select
     18    id
     19   ,substr(gen_value, instr(gen_value, ',') + 1)
     20   ,substr(
     21     trim(substr(gen_value,1, instr(gen_value, ',') - 1))
     22    ,instr(trim(substr(gen_value,1, instr(gen_value, ',') - 1)),' ',-1)
     23   )
     24  from r
     25  where
     26  length(gen_value) > 0
     27  )
     28   select id,
     29   soundex(l)
     30  ,count(*) cnt
     31  ,max(l) keep ( dense_rank last order by length(l), l desc nulls first) v
     32  from r
     33  group by
     34   soundex(l),id
     35  order by
     36   1;
    
    
            ID SOUN        CNT V
    ---------- ---- ---------- ------------
             1 A000          2  AAA
             1 A120          1  AFC
             1 B210          1  BCF
             2 B210          2  BCEV
             2 B230          1  BCD
             3 A120          2  ABCE
             3 M200          1  MNK
    
    7 rows selected.
    
    
    
    SQL>
    i need also to update the count the words to second table already exist called by IT has the follwoing attributtes.
    create table B (id number(9), words varchar2(50), cnt number(9));
    
    insert into b values (1,'ACD BCD AAA',null);
    insert into b values (1,'BDE AAC AFC',null);         
    insert into b values (1,'AFD BCF',null);       
    insert into b values (2,'BBA BBG BCEV',null);
    insert into b values (2',AVF BCD',null);
    insert into b values (3,'BBG HUH ABCE',null);
    insert into b values (3,'JHU KK MNK',null);
    expected output
    id         word                   cnt 
    1       ACD BCD AAA          2
    1       BDE AAC AFC          1
    1       AFD BCF                 1
    2       BBA BBG BCEV         2
    2       AVF BCD                  1
    3       BBG HUH ABCE         2
    3       JHU KK MNK             1
    Note: the compare based on soundex and ONLY for last part of sentence.
    CHERRS
    DHEYA

    Edited by: 973907 on Dec 6, 2012 8:10 PM
  • 7. Re: count based on soundex function
    976910 Newbie
    Currently Being Moderated
    the update instead of last select.

    regards
  • 8. Re: count based on soundex function
    976910 Newbie
    Currently Being Moderated
    Hello anybody help me ?
  • 9. Re: count based on soundex function
    chris227 Guru
    Currently Being Moderated
    update b
    set cnt = (
    with data as (
     select
      id
     ,gen_value||',' gen_value
     from a
    )
    ,r  (id, gen_value, l) as (
     select  
      id
     ,substr(gen_value, instr(gen_value, ',') + 1)
     ,substr(
       trim(substr(gen_value,1, instr(gen_value, ',') - 1))
      ,instr(trim(substr(gen_value,1, instr(gen_value, ',') - 1)),' ',-1)
     )
    from data
    union all
     select  
      id
     ,substr(gen_value, instr(gen_value, ',') + 1)
     ,substr(
       trim(substr(gen_value,1, instr(gen_value, ',') - 1))
      ,instr(trim(substr(gen_value,1, instr(gen_value, ',') - 1)),' ',-1)
     )
    from r
    where
    length(gen_value) > 0
    )
    , cnts as ( 
    select 
    -- soundex(l) s
    count(*) cnt
    ,max(l) keep ( dense_rank last order by length(l), l desc nulls first) v
    --,id
    from r
    group by
     soundex(l), id
    )
    
    select
     cnts.cnt
    from cnts
    where
    words like '% '||v
    )
    
    7 row(s) updated.
    
    select * from b
    
    ID WORDS CNT 
    1 ACD BCD AAA 2 
    1 BDE AAC AFC 1 
    3 JHU KK MNK 1 
    2 AVF BCD 1 
    2 BBA BBG BCEV 2 
    3 BBG HUH ABCE 2 
    1 AFD BCF 1 
    Edited by: chris227 on 07.12.2012 08:21
    words like '% '||v with space
  • 10. Re: count based on soundex function
    AlbertoFaenza Expert
    Currently Being Moderated
    Hi,

    having table a in input you can create rows in table b as INSERT INTO B SELECT .. from the query below:
    CREATE TABLE  a(id NUMBER(9), gen_value VARCHAR2(200));
    
    INSERT INTO a VALUES(1, 'ACD BCD AAA, AFD BCF, BDE AAC AFC,BCF AAE');
    INSERT INTO a VALUES(2, 'AVF BCD, BBA BBG BCEV, GACD MNF BCV');
    INSERT INTO a VALUES(3, 'AFC ABC, BBG HUH ABCE, JHU KK MNK');
    COMMIT;
    
    CREATE TABLE  b(id NUMBER(9), Gen_Value VARCHAR2(200), cnt NUMBER(9));
    
    INSERT INTO b (id, gen_value, cnt)
    WITH got_values AS
    (
       SELECT id, TRIM((column_value).getstringval()) strorig
            , REGEXP_SUBSTR((column_value).getstringval(),'\w+$') str
         FROM a, xmltable(('"'||REPLACE(gen_value,',','","')||'"')) 
    )
    , got_sdx_cnt AS
    (
       SELECT id, strorig
            , COUNT(*) OVER (PARTITION BY id, SOUNDEX(str)) cnt
            , ROW_NUMBER() OVER (PARTITION BY id, SOUNDEX(str) ORDER BY LENGTH(str) DESC, ROWNUM) rn
         FROM got_values
    )
    SELECT id, strorig, cnt 
      FROM got_sdx_cnt
     WHERE rn=1;
     
    SELECT * FROM b;
    
            ID GEN_VALUE              CNT
    ---------- --------------- ----------
             1 ACD BCD AAA              2
             1 BDE AAC AFC              1
             1 AFD BCF                  1
             2 BBA BBG BCEV             2
             2 AVF BCD                  1
             3 BBG HUH ABCE             2
             3 JHU KK MNK               1
    The code above is working on 11g.

    Regards.
    Al

    Edited by: Alberto Faenza on Dec 7, 2012 9:18 PM

    Edited by: Alberto Faenza on Dec 8, 2012 2:33 PM
  • 11. Re: count based on soundex function
    Ayham Newbie
    Currently Being Moderated
    remove
  • 12. Re: count based on soundex function
    Ayham Newbie
    Currently Being Moderated
    remove
  • 13. Re: count based on soundex function
    976910 Newbie
    Currently Being Moderated
    But when i add just one word Like Egypt twice in row id no=1
    and also in one time in row id =3.
    the output will be empty for counting.
    like this:

    drop table a;
    create table  a(id number(9), gen_value varchar2(200));
     
    insert into a values(1, 'ACD BCD AAA, AFD BCF , Egypt,BDE AAC AFC,Egypt,BCF AAE');
    insert into a values(2, 'AVF BCD, BBA BBG BCEV, GACD MNF BCV');
    insert into a values(3, 'AFC ABC, BBG HUH ABCE, Egypt,JHU KK MNK');
    
    drop table B ;
    create table B (bid number(9), words varchar2(50), per number(9));
     
    insert into b values (1,'ACD BCD AAA',null);
    insert into b values (1,'BDE AAC AFC',null);         
    insert into b values (1,'AFD BCF',null);    
    insert into b values (1,'EGYPT',null);
    insert into b values (1,'BDE AAC AFC',null);         
    insert into b values (1,'AFD BCF',null);     
    insert into b values (2,'BBA BBG BCEV',null);
    insert into b values (2,'AVF BCD',null);
    insert into b values (3,'BBG HUH ABCE',null);
    insert into b values (3,'JHU KK MNK',null);
    insert into b values (3,'EGYPT',null);
    update B
    set per = (
    with data as (
     select
      id
     ,gen_value||',' gen_value
     from a
    )
    ,r  (id, gen_value, l) as (
     select  
      id
     ,substr(gen_value, instr(gen_value, ',') + 1)
     ,substr(
       trim(substr(gen_value,1, instr(gen_value, ',') - 1))
      ,instr(trim(substr(gen_value,1, instr(gen_value, ',') - 1)),' ',-1)
     )
    from data
    union all
     select  
      id
     ,substr(gen_value, instr(gen_value, ',') + 1)
     ,substr(
       trim(substr(gen_value,1, instr(gen_value, ',') - 1))
      ,instr(trim(substr(gen_value,1, instr(gen_value, ',') - 1)),' ',-1)
     )
    from r
    where
    length(gen_value) > 0
    )
    , cnts as ( 
    select 
    -- soundex(l) s
    count(*) cnt
    ,max(l) keep ( dense_rank last order by length(l), l desc nulls first) v
    --,id
    from r
    group by
     soundex(l), id
    )
     select
     cnts.cnt
    from cnts
    where
    words like '%'||v );
    select * from b;
           BID WORDS                       PER
    ---------- -------------------- ----------
             1 ACD BCD AAA                   2
             1 BDE AAC AFC                   1
             1 AFD BCF                          1
             1 EGYPT                                                   -- This count is empty
             1 BDE AAC AFC                   1
             1 AFD BCF                          1
             2 BBA BBG BCEV                  2
             2 AVF BCD                           1
             3 BBG HUH ABCE                  2
             3 JHU KK MNK                      1
             3 EGYPT                                                      -- this is also empty . Why?
    
    11 rows selected.
    
    SQL>
    when i tried to add soundex for words and V.
    for last part like this
     select
     cnts.cnt
    from cnts
    where
    soundex (words) like '%'||soundex (v) );
    The error msg appear like this
    SQL> /
    with data as (
    *
    ERROR at line 3:
    ORA-01427: single-row subquery returns more than one row
    
    
    SQL>
    the expected results should be like this
           BID WORDS                       PER
    ---------- -------------------- ----------
             1 ACD BCD AAA                  2
             1 BDE AAC AFC                   1
             1 AFD BCF                          1
             1 EGYPT                            2
             1 BDE AAC AFC                   1
             1 AFD BCF                          1
             2 BBA BBG BCEV                 2
             2 AVF BCD                         1
             3 BBG HUH ABCE                2
             3 JHU KK MNK                    1
             3 EGYPT                           1                  
    Note : i just change id in table B to differentiate with id in table A. and column CNT to Per to differentiate in SQL code.

    the results of this is acceptable but the update part is not included:
    with data as (
     select
      id
     , gen_value ||','  gen_value 
     from a
    )
    ,r  (id,  gen_value , l) as (
     select  
      id
     ,substr( gen_value , instr( gen_value , ',') + 1)
     ,substr(
       trim(substr( gen_value ,1, instr( gen_value , ',') - 1))
      ,instr(trim(substr( gen_value ,1, instr( gen_value , ',') - 1)),' ',-1)
     )
    from data
    union all
     select  
      id
     ,substr( gen_value , instr( gen_value , ',') + 1)
     ,substr(
       trim(substr( gen_value ,1, instr( gen_value , ',') - 1))
      ,instr(trim(substr( gen_value ,1, instr( gen_value , ',') - 1)),' ',-1)
     )
    from r
    where
    length( gen_value ) > 0
    )
     select id,
     soundex(l)as l
    ,count(*) cnt
    ,trim(max(l) keep ( dense_rank last order by length(l), l desc nulls first)) v
    from r where l is not null
    group by
     soundex(l),id
     order by 1;
    Edited by: 973907 on Dec 8, 2012 9:07 PM
  • 14. Re: count based on soundex function
    AlbertoFaenza Expert
    Currently Being Moderated
    Hi,

    why are you answering at me and use the code from someone else.

    If you try it with my code here below is the result. As I mentioned in my previous post I suggest to insert directly the final value instead of updating it a second time.

    Check here below:
    CREATE TABLE  a(id NUMBER(9), gen_value VARCHAR2(200));
    
    insert into a values(1, 'ACD BCD AAA, AFD BCF , Egypt,BDE AAC AFC,Egypt,BCF AAE');
    insert into a values(2, 'AVF BCD, BBA BBG BCEV, GACD MNF BCV');
    insert into a values(3, 'AFC ABC, BBG HUH ABCE, Egypt,JHU KK MNK');
    COMMIT;
    
    CREATE TABLE  b(id NUMBER(9), Gen_Value VARCHAR2(200), cnt NUMBER(9));
    
    INSERT INTO b (id, gen_value, cnt)
    WITH got_values AS
    (
       SELECT id, TRIM((column_value).getstringval()) strorig
            , REGEXP_SUBSTR((column_value).getstringval(),'\w+$') str
         FROM a, xmltable(('"'||REPLACE(gen_value,',','","')||'"')) 
    )
    , got_sdx_cnt AS
    (
       SELECT id, strorig
            , COUNT(*) OVER (PARTITION BY id, SOUNDEX(str)) cnt
            , ROW_NUMBER() OVER (PARTITION BY id, SOUNDEX(str) ORDER BY LENGTH(str) DESC, ROWNUM) rn
         FROM got_values
    )
    SELECT id, strorig, cnt 
      FROM got_sdx_cnt
     WHERE rn=1;
     
    SELECT * FROM b;
    
            ID GEN_VALUE              CNT
    ---------- --------------- ----------
             1 ACD BCD AAA              2
             1 BDE AAC AFC              1
             1 Egypt                    2
             1 AFD BCF                  1
             2 BBA BBG BCEV             2
             2 AVF BCD                  1
             3 BBG HUH ABCE             2
             3 Egypt                    1
             3 JHU KK MNK               1
    Regards.
    Al
1 2 Previous Next

Legend

  • Correct Answers - 10 points
  • Helpful Answers - 5 points