| General Information | 
| Anchoring Characters
 | 
| Character Class | Description |  | ^ | Anchor the expression to the start of a line |  | $ | Anchor the expression to the end of a line |  | 
| Equivalence Classes
 | 
| Character Class | Description |  | = = | Oracle supports the equivalence classes through the POSIX '[==]' syntax. A base letter and all of its accented versions constitute an equivalence class. For example, the equivalence class '[=a=]' matches ä and â. The equivalence classes are valid only inside the bracketed expression |  | 
| Match Options
 | 
| Character Class | Description |  | c | Case sensitive matching |  | i | Case insensitive matching |  | m | Treat source string as multi-line activating Anchor chars |  | n | Allow the period (.) to match any newline character |  | 
| Posix Characters
 | 
| Character Class | Description |  | [:alnum:] | Alphanumeric characters |  | [:alpha:] | Alphabetic characters |  | [:blank:] | Blank Space Characters |  | [:cntrl:] | Control characters (nonprinting) |  | [:digit:] | Numeric digits |  | [:graph:] | Any [:punct:], [:upper:], [:lower:], and [:digit:] chars |  | [:lower:] | Lowercase alphabetic characters |  | [:print:] | Printable characters |  | [:punct:] | Punctuation characters |  | [:space:] | Space characters (nonprinting), such as carriage return, newline, vertical tab, and form feed |  | [:upper:] | Uppercase alphabetic characters |  | [:xdigit:] | Hexidecimal characters |  | 
| Quantifier Characters
 | 
| Character Class | Description |  | * | Match 0 or more times |  | ? | Match 0 or 1 time |  | + | Match 1 or more times |  | {m} | Match exactly m times |  | {m,} | Match at least m times |  | {m, n} | Match at least m times but no more than n times |  | \n | Cause the previous expression to be repeated n times |  | 
| Alternative Matching And Grouping Characters
 | 
| Character Class | Description |  | | | Separates alternates, often used with grouping operator () |  | ( ) | Groups subexpression into a unit for alternations, for quantifiers, or for backreferencing (see "Backreferences" section) |  | [char] | Indicates a character list; most metacharacters inside a character list are understood as literals, with the exception of character classes, and the ^ and - metacharacters |  | 
| Demo Table
 | CREATE TABLE test ( testcol VARCHAR2(50));
 
 INSERT INTO test VALUES ('abcde');
 INSERT INTO test VALUES ('12345');
 INSERT INTO test VALUES ('1a4A5');
 INSERT INTO test VALUES ('12a45');
 INSERT INTO test VALUES ('12aBC');
 INSERT INTO test VALUES ('12abc');
 INSERT INTO test VALUES ('12ab5');
 INSERT INTO test VALUES ('12aa5');
 INSERT INTO test VALUES ('12AB5');
 INSERT INTO test VALUES ('ABCDE');
 INSERT INTO test VALUES ('123-5');
 INSERT INTO test VALUES ('12.45');
 INSERT INTO test VALUES ('1a4b5');
 INSERT INTO test VALUES ('1 3 5');
 INSERT INTO test VALUES ('1  45');
 INSERT INTO test VALUES ('1   5');
 INSERT INTO test VALUES ('a  b  c  d');
 INSERT INTO test VALUES ('a b  c   d    e');
 INSERT INTO test VALUES ('a              e');
 INSERT INTO test VALUES ('Steven');
 INSERT INTO test VALUES ('Stephen');
 INSERT INTO test VALUES ('111.222.3333');
 INSERT INTO test VALUES ('222.333.4444');
 INSERT INTO test VALUES ('333.444.5555');
 INSERT INTO test VALUES ('abcdefabcdefabcxyz');
 COMMIT;
 | 
|  | 
| REGEXP_COUNT | 
| Syntax | REGEXP_COUNT(<source_string>, <pattern>[[, <start_position>], [<match_parameter>]]) 
 -- match parameter:
 'c' = case sensitive
 'i' = case insensitive search
 'm' = treats the source string as multiple lines
 'n' = allows the period (.) wild character to match newline
 'x' = ignore whitespace characters
 | 
| Count's occurrences based on a regular expression
 | SELECT REGEXP_COUNT(testcol, '2a', 1, 'i') RESULT FROM test;
 
 SELECT REGEXP_COUNT(testcol, 'e', 1, 'i') RESULT
 FROM test;
 | 
|  | 
| REGEXP_INSTR | 
| Syntax | REGEXP_INSTR(<source_string>, <pattern>[[, <start_position>][, <occurrence>][, <return_option>][, <match_parameter>][, <sub_expression>]]) | 
| Find character 'o' followed by any 3 alphabetic characters: case insensitiveOur thanks to Cassio for spotting a typo here.
 | SELECT REGEXP_INSTR('500 Oracle Pkwy, Redwood Shores, CA', '[o][[:alpha:]]{3}', 1, 1, 0, 'i') RESULT FROM DUAL;
 
 SELECT REGEXP_INSTR('500 Oracle Pkwy, Redwood Shores, CA', '[o][[:alpha:]]{3}', 1, 1, 1, 'i') RESULT
 FROM DUAL;
 
 SELECT REGEXP_INSTR('500 Oracle Pkwy, Redwood Shores, CA', '[o][[:alpha:]]{3}', 1, 2, 0, 'i') RESULT
 FROM DUAL;
 
 SELECT REGEXP_INSTR('500 Oracle Pkwy, Redwood Shores, CA', '[o][[:alpha:]]{3}', 1, 2, 1, 'i') RESULT
 FROM DUAL;
 | 
| Find the position of try, trying, tried or tries | SELECT REGEXP_INSTR('We are trying to make the subject easier.', 'tr(y(ing)?|(ied)|(ies))') RESULTNUM FROM DUAL;
 | 
| Using Sub-Expression option | SELECT testcol, REGEXP_INSTR(testcol, 'ab', 1, 1, 0, 'i', 0) FROM test;
 
 SELECT testcol, REGEXP_INSTR(testcol, 'ab', 1, 1, 0, 'i', 1)
 FROM test;
 
 SELECT testcol, REGEXP_INSTR(testcol, 'a(b)', 1, 1, 0, 'i', 1)
 FROM test;
 | 
|  | 
| REGEXP_LIKE | 
| Syntax | REGEXP_LIKE(<source_string>, <pattern>, <match_parameter>) | 
| AlphaNumeric Characters
 | SELECT * FROM test
 WHERE REGEXP_LIKE(testcol, '[[:alnum:]]');
 
 SELECT *
 FROM test
 WHERE REGEXP_LIKE(testcol, '[[:alnum:]]{3}');
 
 SELECT *
 FROM test
 WHERE REGEXP_LIKE(testcol, '[[:alnum:]]{5}');
 | 
| Alphabetic Characters
 | SELECT * FROM test
 WHERE REGEXP_LIKE(testcol, '[[:alpha:]]');
 
 SELECT *
 FROM test
 WHERE REGEXP_LIKE(testcol, '[[:alpha:]]{3}');
 
 SELECT *
 FROM test
 WHERE REGEXP_LIKE(testcol, '[[:alpha:]]{5}');
 | 
| Control Characters
 | INSERT INTO test VALUES ('zyx' || CHR(13) || 'wvu'); COMMIT;
 
 SELECT *
 FROM test
 WHERE REGEXP_LIKE(testcol, '[[:cntrl:]]{1}');
 | 
| Digits
 | SELECT * FROM test
 WHERE REGEXP_LIKE(testcol, '[[:digit:]]');
 
 SELECT *
 FROM test
 WHERE REGEXP_LIKE(testcol, '[[:digit:]]{3}');
 
 SELECT *
 FROM test
 WHERE REGEXP_LIKE(testcol, '[[:digit:]]{5}');
 | 
| Lower Case
 | SELECT * FROM test
 WHERE REGEXP_LIKE(testcol, '[[:lower:]]');
 
 SELECT *
 FROM test
 WHERE REGEXP_LIKE(testcol, '[[:lower:]]{2}');
 
 SELECT *
 FROM test
 WHERE REGEXP_LIKE(testcol, '[[:lower:]]{3}');
 
 SELECT *
 FROM test
 WHERE REGEXP_LIKE(testcol, '[[:lower:]]{5}');
 | 
| Printable Characters
 | SELECT * FROM test
 WHERE REGEXP_LIKE(testcol, '[[:print:]]{5}');
 
 SELECT *
 FROM test
 WHERE REGEXP_LIKE(testcol, '[[:print:]]{6}');
 
 SELECT *
 FROM test
 WHERE REGEXP_LIKE(testcol, '[[:print:]]{7}');
 | 
| Punctuation
 | TRUNCATE TABLE test; 
 SELECT *
 FROM test
 WHERE REGEXP_LIKE(testcol, '[[:punct:]]');
 | 
| Spaces
 | SELECT * FROM test
 WHERE REGEXP_LIKE(testcol, '[[:space:]]');
 
 SELECT *
 FROM test
 WHERE REGEXP_LIKE(testcol, '[[:space:]]{2}');
 
 SELECT *
 FROM test
 WHERE REGEXP_LIKE(testcol, '[[:space:]]{3}');
 
 SELECT *
 FROM test
 WHERE REGEXP_LIKE(testcol, '[[:space:]]{5}');
 | 
| Upper Case
 | SELECT * FROM test
 WHERE REGEXP_LIKE(testcol, '[[:upper:]]');
 
 SELECT *
 FROM test
 WHERE REGEXP_LIKE(testcol, '[[:upper:]]{2}');
 
 SELECT *
 FROM test
 WHERE REGEXP_LIKE(testcol, '[[:upper:]]{3}');
 | 
| Values Starting with 'a%b' | SELECT testcol FROM test
 WHERE REGEXP_LIKE(testcol, '^ab*');
 | 
| 'a' is the third value | SELECT testcol FROM test
 WHERE REGEXP_LIKE(testcol, '^..a.');
 | 
| Contains two consecutive occurances of the letter 'a' or 'z' | SELECT testcol FROM test
 WHERE REGEXP_LIKE(testcol, '([az])\1', 'i');
 | 
| Begins with 'Ste' ends with 'en' and contains either 'v' or 'ph' in the center | SELECT testcol FROM test
 WHERE REGEXP_LIKE(testcol, '^Ste(v|ph)en$');
 | 
| Use a regular expression in a check constraint | CREATE TABLE mytest (c1 VARCHAR2(20), CHECK (REGEXP_LIKE(c1, '^[[:alpha:]]+$')));
 | 
| Identify SSN
 
 Thanks: Byron Bush HIOUG
 | CREATE TABLE ssn_test ( ssn_col  VARCHAR2(20));
 
 INSERT INTO ssn_test VALUES ('111-22-3333');
 INSERT INTO ssn_test VALUES ('111=22-3333');
 INSERT INTO ssn_test VALUES ('111-A2-3333');
 INSERT INTO ssn_test VALUES ('111-22-33339');
 INSERT INTO ssn_test VALUES ('111-2-23333');
 INSERT INTO ssn_test VALUES ('987-65-4321');
 COMMIT;
 
 SELECT ssn_col
 from ssn_test
 WHERE REGEXP_LIKE(ssn_col,'^[0-9]{3}-[0-9]{2}-[0-9]{4}$');
 | 
|  | 
| REGEXP_REPLACE | 
| Syntax | REGEXP_REPLACE(<source_string>, <pattern>, <replace_string>, <position>, <occurrence>, <match_parameter>)
 | 
| Looks for the pattern xxx.xxx.xxxx and reformats  pattern to (xxx) xxx-xxxx | col testcol format a15 col result format a15
 
 SELECT testcol, REGEXP_REPLACE(testcol,
 '([[:digit:]]{3})\.([[:digit:]]{3})\.([[:digit:]]{4})',
 '(\1) \2-\3') RESULT
 FROM test
 WHERE LENGTH(testcol) = 12;
 | 
| Put a space after every character | SELECT testcol, REGEXP_REPLACE(testcol, '(.)', '\1 ') RESULT FROM test
 WHERE testcol like 'S%';
 | 
| Replace multiple spaces with a single space | SELECT REGEXP_REPLACE('500    Oracle    Parkway, Redwood    Shores, CA', '( ){2,}', ' ') RESULT FROM DUAL;
 | 
| Insert a space between a lower case character followed by an upper case character | SELECT REGEXP_REPLACE('George McGovern', '([[:lower:]])([[:upper:]])', '\1 \2') CITY FROM DUAL;
 (Produces 'George Mc Govern')
 | 
| Replace the period with a string (note use of '\') | SELECT REGEXP_REPLACE('We are trying to make the subject easier.','\.',' for you.') REGEXT_SAMPLE FROM DUAL;
 | 
| Demo | CREATE TABLE t( testcol VARCHAR2(10));
 
 INSERT INTO t VALUES ('1');
 INSERT INTO t VALUES ('2    ');
 INSERT INTO t VALUES ('3 new  ');
 
 col newval format a10
 
 SELECT LENGTH(testcol) len, testcol origval,
 REGEXP_REPLACE(testcol, '\W+$', ' ') newval,
 LENGTH(REGEXP_REPLACE(testcol, '\W+$', ' ')) newlen
 FROM t;
 | 
|
| Code snippet courtesy of Valentin Matak. This is a handy way to remove duplicate characters from a string. This example shows the 1+ repeatability qualifier in use.
 | SELECT REGEXP_REPLACE('ABBBCABBCCCAABAAAAA', '(A|B|C)\1+', '\1') FROM DUAL;
 | 
|
|  | 
| REGEXP_SUBSTR | 
| Syntax | REGEXP_SUBSTR(source_string, pattern [, position [, occurrence
 [, match_parameter]]])
 | 
| Searches for a comma followed by one or more occurrences of non-comma characters followed by a comma | SELECT REGEXP_SUBSTR('500 Oracle Parkway, Redwood Shores, CA', ',[^,]+,') RESULT FROM DUAL;
 | 
| Look for http:// followed by a substring of one or more alphanumeric characters and optionally, a period (.) | col result format a50 
 SELECT REGEXP_SUBSTR('Go to http://www.oracle.com/products and click on database',
 'http://([[:alnum:]]+\.?){3,4}/?') RESULT
 FROM DUAL;
 | 
| Extracts try, trying, tried or tries | SELECT REGEXP_SUBSTR('We are trying to make the subject easier.','tr(y(ing)?|(ied)|(ies))') FROM DUAL;
 | 
| Extract the 3rd field treating ':' as a delimiter | SELECT REGEXP_SUBSTR('system/pwd@orabase:1521:sidval', '[^:]+', 1, 3) RESULT
 FROM DUAL;
 | 
| Extract from string with vertical bar delimiter
 | CREATE TABLE regexp ( testcol VARCHAR2(50));
 
 INSERT INTO regexp
 (testcol)
 VALUES
 ('One|Two|Three|Four|Five');
 
 SELECT * FROM regexp;
 
 SELECT REGEXP_SUBSTR(testcol,'[^|]+', 1, 3)
 FROM regexp;
 | 
| Equivalence classes | SELECT REGEXP_SUBSTR('iSelfSchooling NOT ISelfSchooling', '[[=i=]]SelfSchooling') RESULT FROM DUAL;
 | 
| Parsing Demo
 | set serveroutput on 
 DECLARE
 x VARCHAR2(2);
 y VARCHAR2(2);
 c VARCHAR2(40) := '1:3,4:6,8:10,3:4,7:6,11:12';
 BEGIN
 x := REGEXP_SUBSTR(c,'[^:]+', 1, 1);
 y := REGEXP_SUBSTR(c,'[^,]+', 3, 1);
 
 dbms_output.put_line(x ||' '|| y);
 END;
 /
 Gary Whitaker wrote in with an addition to this parsing demo, and had the following comments:The parsing demo above uses the regular expression '[^,]+' which does not work when there is a NULL element in the list. This could result in returning the wrong element's data. Consider this simple example with comments:
 set serveroutput on
 DECLARE
 x VARCHAR2(1);
 y VARCHAR2(1);
 z VARCHAR2(1);
 c VARCHAR2(40) := '1,2,,4,5,6,7';
 BEGIN
 
 -- Works as expected if the value you seek is before any null value in the list:
 
 x := REGEXP_SUBSTR(c, '[^,]+', 1, 2);
 
 -- This form only returns the 4th element when all elements are present.
 -- It will return the 4th non-null element, which in this example is really '5',
 -- which could be misleading.
 -- if you are really after the 4th element regardless if there is a null element:
 y := REGEXP_SUBSTR(c, '[^,]+', 1, 4);
 -- This form gets the actual 4th element, allowing for the null element.
 -- Get the 1st substring of the 4th instance of a set of characters that are not a comma,
 -- when followed by a comma or the end of the line:
 
 z := REGEXP_SUBSTR(c, '([^,]*)(,|$)', 1, 4, NULL, 1);
 
 dbms_output.put_line(x);
 dbms_output.put_line(y);
 dbms_output.put_line(z);
 END;
 /
 | 
 
No comments:
Post a Comment