Forum Stats

  • 3,734,025 Users
  • 2,246,861 Discussions
  • 7,857,001 Comments

Discussions

Question about using REGEXP_REPLACE to replace commas between double quotes

928881
928881 Member Posts: 3
edited April 2012 in SQL & PL/SQL
Hello,

I have been looking around to try to find a solution to my problem but I would like to use the REGEXP_REPLACE function to do the following:
Original String:
TEXT1,TEXT2,"TEXT3,TEXT4,TEXT5",TEXT6,TEXT7,"TEXT8,TEXT9,TEXT10,TEXT11",TEXT12

Desired String:
TEXT1,TEXT2,"TEXT3 TEXT4 TEXT5",TEXT6,TEXT7,"TEXT8 TEXT9 TEXT10 TEXT11",TEXT12

Could anyone help me accomplish this?

Answers

  • 908002
    908002 Member Posts: 1,202
    edited April 2012
    this is only way i got after trying for lot of time..

    not efficient but .......
    select regexp_substr
    ('TEXT1,TEXT2,"TEXT3,TEXT4,TEXT5",TEXT6,TEXT7,"TEXT8,TEXT9,TEXT10,TEXT11",TEXT12',
    '[^"]+')  ||'"'|| replace(regexp_substr
    ('TEXT1,TEXT2,"TEXT3,TEXT4,TEXT5",TEXT6,TEXT7,"TEXT8,TEXT9,TEXT10,TEXT11",TEXT12',
    '[^"]+',1,2),',' ,' ') ||'"'||regexp_substr
    ('TEXT1,TEXT2,"TEXT3,TEXT4,TEXT5",TEXT6,TEXT7,"TEXT8,TEXT9,TEXT10,TEXT11",TEXT12',
    '[^"]+',1,3) ||'"'|| replace(regexp_substr
    ('TEXT1,TEXT2,"TEXT3,TEXT4,TEXT5",TEXT6,TEXT7,"TEXT8,TEXT9,TEXT10,TEXT11",TEXT12',
    '[^"]+',1,4),',' ,' ') ||'"'||regexp_substr
    ('TEXT1,TEXT2,"TEXT3,TEXT4,TEXT5",TEXT6,TEXT7,"TEXT8,TEXT9,TEXT10,TEXT11",TEXT12',
    '[^"]+',1,5)
    from dual;
    {code}
    
    Edited by: Kiran on Apr 6, 2012 2:51 AM                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                
  • Nicosa-Oracle
    Nicosa-Oracle Member Posts: 1,319 Employee
    Hi,

    I can't either get it to work in one unique regexp.
    Best I can do is as follows :
    [email protected] SQL>l
      1  with t as (
      2  select 'TEXT1,TEXT2,"TEXT3,TEXT4,TEXT5",TEXT6,TEXT7,"TEXT8,TEXT9,TEXT10,TEXT11",TEXT12' str from dual
      3  union all select 'TEXT1,TEXT2,TEXT3,TEXT4,TEXT5,TEXT6,TEXT7,TEXT8,TEXT9,TEXT10,TEXT11,TEXT12' from dual
      4  union all select '"TEXT1,TEXT2,TEXT3,TEXT4,TEXT5,TEXT6,TEXT7,TEXT8,TEXT9,TEXT10,TEXT11,TEXT12"' from dual
      5  union all select '"TEXT1,TEXT2,TEXT3,TEXT4,TEXT5,TEXT6",TEXT7,TEXT8,TEXT9,TEXT10,TEXT11,TEXT12' from dual
      6  union all select 'TEXT1,TEXT2,TEXT3,TEXT4,TEXT5,TEXT6,TEXT7,"TEXT8,TEXT9,TEXT10,TEXT11,TEXT12"' from dual
      7  )
      8  select str,listagg(rep,'') within group (order by n) newstr
      9  from (
     10  	select str
     11  	,n, case when (mod(n,2)=q) then '"'||replace(sub,',',' ')||'"' else sub end rep
     12  	from (
     13  		select str,
     14  		m.column_value n,
     15  		case when regexp_like(str,'^"') then 1 else 0 end q,
     16  		regexp_substr(str,'[^"]+',1,m.column_value) sub
     17  		from t,
     18  		table(cast(multiset(select level from dual connect by level <= regexp_count(str,'[^"]+')) as sys.odciNumberList)) m
     19  	)
     20  )
     21* group by str
    [email protected] SQL>/
    
    STR
    ------------------------------------------------------------------------------------------
    NEWSTR
    ------------------------------------------------------------------------------------------
    "TEXT1,TEXT2,TEXT3,TEXT4,TEXT5,TEXT6",TEXT7,TEXT8,TEXT9,TEXT10,TEXT11,TEXT12
    "TEXT1 TEXT2 TEXT3 TEXT4 TEXT5 TEXT6",TEXT7,TEXT8,TEXT9,TEXT10,TEXT11,TEXT12
    
    "TEXT1,TEXT2,TEXT3,TEXT4,TEXT5,TEXT6,TEXT7,TEXT8,TEXT9,TEXT10,TEXT11,TEXT12"
    "TEXT1 TEXT2 TEXT3 TEXT4 TEXT5 TEXT6 TEXT7 TEXT8 TEXT9 TEXT10 TEXT11 TEXT12"
    
    TEXT1,TEXT2,"TEXT3,TEXT4,TEXT5",TEXT6,TEXT7,"TEXT8,TEXT9,TEXT10,TEXT11",TEXT12
    TEXT1,TEXT2,"TEXT3 TEXT4 TEXT5",TEXT6,TEXT7,"TEXT8 TEXT9 TEXT10 TEXT11",TEXT12
    
    TEXT1,TEXT2,TEXT3,TEXT4,TEXT5,TEXT6,TEXT7,"TEXT8,TEXT9,TEXT10,TEXT11,TEXT12"
    TEXT1,TEXT2,TEXT3,TEXT4,TEXT5,TEXT6,TEXT7,"TEXT8 TEXT9 TEXT10 TEXT11 TEXT12"
    
    TEXT1,TEXT2,TEXT3,TEXT4,TEXT5,TEXT6,TEXT7,TEXT8,TEXT9,TEXT10,TEXT11,TEXT12
    TEXT1,TEXT2,TEXT3,TEXT4,TEXT5,TEXT6,TEXT7,TEXT8,TEXT9,TEXT10,TEXT11,TEXT12
    
    
    5 rows selected.
    {code}but that implies 11g or more...
    
    I wish some regexp guru passes by to drop a magical regexp_replace !
    :-)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            
  • Frank Kulash
    Frank Kulash Member, Moderator Posts: 40,497 Red Diamond
    edited April 2012
    Hi,

    Welcome to the forum!

    Here's another way, which works with any number of double-quotes:
    WITH	got_str		AS
    (
    	SELECT	'TEXT1,TEXT2,"TEXT3,TEXT4,TEXT5",TEXT6,TEXT7,"TEXT8,TEXT9,TEXT10,TEXT11",TEXT12' AS str
    	FROM	dual
    )
    ,	cntr	AS
    (
    	SELECT	LEVEL	AS n
    	FROM	got_str
    	CONNECT BY	LEVEL	<= 1 + LENGTH (str)
    				     - LENGTH (REPLACE (STR, '"'))
    )
    ,	got_parts	AS
    (
    	SELECT	REPLACE ( REGEXP_SUBSTR ( s.str
    					, '[^"]+'
    					, 1
    					, c.n
    					)
    			, ','
    			, CASE
    				WHEN  MOD (c.n, 2) = 0
    				THEN  ' '
    				ELSE  ','
    			  END
    			)	AS part
    	, 	c.n
    	FROM 		cntr	c
    	CROSS JOIN	got_str	s 
    )
    SELECT	SUBSTR ( SYS_CONNECT_BY_PATH (part, '"')
    	       , 2
    	       )	AS new_str
    FROM	got_parts
    WHERE	CONNECT_BY_ISLEAF	= 1
    START WITH	n	= 1
    CONNECT BY	n	= 1 + PRIOR n
    ;
    {code}
    This assumes that you are only processing one string at a time (that is, that got_str has only one row).  If that's not the case, the same basic approach will work, but it will be a little more complicated.
    This also assumes that you never have consecutive double-quotes in str, for example "Out1, Out2,"In1",Out3,"",Out4'.  Again, if I assumed wrong, it's just a minor change.
    If the string contains unmatched double-quotes, then commas will be removed after the last one, as if an extra double-quote were assumed at the end.
    
    The basic strategy is to divide the string into "-delimited parts.  All the odd-numbered parts (that is, parts 1, 3, 5, ...) are outside double-quotes, and all the even-numbered parts (2, 4, 6, ...) are inside double-quotes.  The REPLACE function in got_parts changes the commas to spaces in the even-numbered parts, but "changes" them to commas in the odd-numbered parts.  The main query re-combines the parts, in order, restoring the doule-quotes.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                
  • jeneesh
    jeneesh Member Posts: 7,168
    edited April 2012
    Using Recursion....
    SQL> select * from test order by id;
    
            ID STR
    ---------- --------------------------------------------------------------------------------
             1 TEXT1,TEXT2,"TEXT3,TEXT4,TEXT5",TEXT6,TEXT7,"TEXT8,TEXT9,TEXT10,TEXT11",TEXT12
             2 TEXT1,TEXT2,"TEXT3",TEXT6,TEXT7,"TEXT8,TEXT9,TEXT10,TEXT11",TEXT12
             3 "TEXT3,TEXT4,TEXT5"
             4 TEXT8,TEXT9,TEXT10,TEXT11
    
    SQL> with t(id,str) as
      2  (select id,regexp_replace(str,'(")([^"|,]+)(,)([^"]+)(")','\1\2 \4\5') str
      3   from test
      4   union all
      5   select t2.id,regexp_replace(t2.str,'(")([^"|,]+)(,)([^"]+)(")','\1\2 \4\5') str
      6   from  t t2
      7  )
      8      cycle str set lvl to 1 default 0
      9  select id,str
     10  from t
     11  where lvl = 1
     12  order by id;
    
            ID STR
    ---------- --------------------------------------------------------------------------------
             1 TEXT1,TEXT2,"TEXT3 TEXT4 TEXT5",TEXT6,TEXT7,"TEXT8 TEXT9 TEXT10 TEXT11",TEXT12
             2 TEXT1,TEXT2,"TEXT3",TEXT6,TEXT7,"TEXT8 TEXT9 TEXT10 TEXT11",TEXT12
             3 "TEXT3 TEXT4 TEXT5"
             4 TEXT8,TEXT9,TEXT10,TEXT11
  • odie_63
    odie_63 Member Posts: 8,439 Bronze Badge
    Using XQuery (11.2) :
    SQL> WITH	strings		AS
      2  (
      3  	SELECT	'TEXT1,TEXT2,"TEXT3,TEXT4,TEXT5",TEXT6,TEXT7,"TEXT8,TEXT9,TEXT10,TEXT11",TEXT12' AS str	FROM	dual UNION ALL
      4   SELECT	'TEXT1,TEXT2,"TEXT3",TEXT4' AS str	FROM	dual UNION ALL
      5   SELECT	'"TEXT1,TEXT2","TEXT3,TEXT4"' AS str	FROM	dual
      6  )
      7  SELECT x.result
      8  FROM strings t
      9     , XMLTable(
     10       'string-join(
     11          for $i at $p in ora:tokenize($str,"""")
     12          return if ($p mod 2 = 0) then translate($i,","," ") else $i
     13        , "")'
     14        passing t.str AS "str"
     15        columns result varchar2(4000) path '.'
     16       ) x
     17  ;
     
    RESULT
    --------------------------------------------------------------------------------
    TEXT1,TEXT2,TEXT3 TEXT4 TEXT5,TEXT6,TEXT7,TEXT8 TEXT9 TEXT10 TEXT11,TEXT12
    TEXT1,TEXT2,TEXT3,TEXT4
    TEXT1 TEXT2,TEXT3 TEXT4
     
  • 928881
    928881 Member Posts: 3
    Thank you all for the replys, unfortunantly since this is thousands of rows of data coming into our system we can not really union the datasets together and duplicate information without causing possible performance problems.

    Frank Kulash: Thanks for the welcome and I liked the look of your select but when I ran it in our 10gR2 database nothing was returned. Could I have something wrong?
    WITH Got_Str AS
    (SELECT 'TEXT1,TEXT2,"TEXT3,TEXT4,TEXT5",TEXT6,TEXT7,"TEXT8,TEXT9,TEXT10,TEXT11",TEXT12' AS Str
    FROM Dual),
    Cntr AS
    (SELECT LEVEL AS n
    FROM Got_Str
    CONNECT BY LEVEL <= 1 + Length(Str) - Length(REPLACE(Str, '"'))),
    Got_Parts AS
    (SELECT REPLACE(Regexp_Substr(s.Str, '[^"]+', 1, c.n),
    ',',
    CASE
    WHEN MOD(c.n, 2) = 0 THEN
    ' '
    ELSE
    ','
    END) AS Part,
    c.n
    FROM Cntr c
    CROSS JOIN Got_Str s)
    SELECT Substr(Sys_Connect_By_Path(Part, '"'), 2) AS New_Str
    FROM Got_Parts
    WHERE Connect_By_Isleaf = 1
    START WITH n = 1
    CONNECT BY n = 1 + PRIOR n;
  • Frank Kulash
    Frank Kulash Member, Moderator Posts: 40,497 Red Diamond
    Hi,
    925878 wrote:
    Thank you all for the replys, unfortunantly since this is thousands of rows of data coming into our system we can not really union the datasets together and duplicate information without causing possible performance problems.
    That's okay; nobody's asking you to do that. The sub-query t in Nicosa's solution {message:id=10257977} , for example, and Got_Str in mine, is just a way of getting some sample data, since you didn;t post CREATE TABLE and INSERT statements for some sample data yourself. Use your real table instead ot t.
    Frank Kulash: Thanks for the welcome and I liked the look of your select but when I ran it in our 10gR2 database nothing was returned.
    What exactly do you mean? Did you get "no rows selected"? Did you get a line with (apparantly) NULL in the new_str column? Did the system quit responding altogether?
    >
    Could I have something wrong?
    WITH Got_Str AS ...
    I just ran what you posted (in Oracle 10.2.0.1.0) and got this output:
    NEW_STR
    --------------------------------------------------------------------------------
    TEXT1,TEXT2,"TEXT3 TEXT4 TEXT5",TEXT6,TEXT7,"TEXT8 TEXT9 TEXT10 TEXT11",TEXT12
  • 928881
    928881 Member Posts: 3
    I would not say that anything was gotten wrong but I received a NULL row when I run the query for some reason.
    NEW_STR
    ----------------
  • jihuyao
    jihuyao Member Posts: 462
    nice to catch all the good ideas (though still lost on much syntax). Here just comes another way similar to the recursive sql,


    1 select f_reg_replace(a1, a2, a3) as output_str
    2 from (
    3 SELECT 'TEXT1,TEXT2,"TEXT3,TEXT4,TEXT5",TEXT6,TEXT7,"TEXT8,TEXT9,TEXT10,TEXT11",TEXT12' as a1,
    4 '(,")([^"]*)(,)' as a2,
    5 '\1\2 ' as a3
    6 FROM DUAL
    7* ) t
    SQL> /

    OUTPUT_STR
    ----------------------------------------------------------------------------------------------------
    TEXT1,TEXT2,"TEXT3 TEXT4 TEXT5",TEXT6,TEXT7,"TEXT8 TEXT9 TEXT10 TEXT11",TEXT12


    create or replace function f_reg_replace(
    input_str in varchar2,
    pattern_str in varchar2,
    replace_str in varchar2
    ) return varchar2
    is

    v_str varchar2(4000) ;

    begin

    v_str := REGEXP_REPLACE(input_str, pattern_str, replace_str, 1, 0) ;

    if v_str = input_str then

    return v_str ;

    else

    v_str := f_reg_replace(v_str, pattern_str, replace_str) ;

    end if ;

    return v_str ;

    end ;
    /
  • 933608
    933608 Member Posts: 1
    edited April 2012
    The below query can be a suitable one. Output is returned in separate rows. If you are particular about getting the output in a single row, you may apply SYS Connect By clause on top of the below query.(Ref to reply from Frank Kulash for using Sys Connect By)

    SELECT REPLACE(DECODE(SUBSTR(TKN,1,1),'"', TKN||LTKN,TKN),'"','') FROM
    (SELECT TKN, LVL,LEAD(TKN)OVER(ORDER BY LVL) LTKN FROM
    (SELECT REGEXP_SUBSTR('ABC,"DE,FG",XYZ,"KLM,NO"','[^,]+',1,LEVEL)TKN,LEVEL LVL FROM DUAL
    CONNECT BY REGEXP_SUBSTR('ABC,"DE,FG",XYZ,"KLM,NO"','[^,]+',1,LEVEL) IS NOT NULL)
    )
    WHERE TKN NOT LIKE '%"'

    Edited by: Srikanth.P on Apr 26, 2012 9:42 PM
  • Aketi Jyuuzou
    Aketi Jyuuzou Member Posts: 1,072 Bronze Badge
    I like model clause B-)
    with t(ID,Val) as(
    select 1,'TEXT1,TEXT2,"TEXT3,TEXT4,TEXT5",TEXT6,TEXT7,"TEXT8,TEXT9,TEXT10,TEXT11",TEXT12' from dual union
    select 2,'TEXT1,TEXT2,"TEXT3",TEXT6,TEXT7,"TEXT8,TEXT9,TEXT10,TEXT11",TEXT12' from dual union
    select 3,'"TEXT3,TEXT4,TEXT5"' from dual union
    select 4,'TEXT8,TEXT9,TEXT10,TEXT11' from dual)
    select ID,Val
      from t
     model
    dimension by(ID)
    measures(Val)
    rules iterate(100)(
    Val[any] = RegExp_Replace(Val[cv()],',(([^"]*"){2})*([^"]*"[^"]*)$',' \1\3'));
    
    ID  Val
    --  ------------------------------------------------------------------------------
     1  TEXT1,TEXT2,"TEXT3 TEXT4 TEXT5",TEXT6,TEXT7,"TEXT8 TEXT9 TEXT10 TEXT11",TEXT12
     2  TEXT1,TEXT2,"TEXT3",TEXT6,TEXT7,"TEXT8 TEXT9 TEXT10 TEXT11",TEXT12
     3  "TEXT3 TEXT4 TEXT5"
     4  TEXT8,TEXT9,TEXT10,TEXT11
This discussion has been closed.