oralce正则表达式函数

Oracle Regular Expressions

Version 11.1

General information

Anchoring Characters

Character Class	Description
^	Anchor the expression to the start of a line
$	Anchor the expression to the end of a line

Equivalence Classes

Character Class	Description
= =	Oracle supports the equivalence classes through the POSIX '[==]' Syntax. A base letter and all of its accented versions constitute an equivalence class. For example,the equivalence class '[=a=]' matches ä and â. The equivalence classes are valid only inside the bracketed expression

Match Options

Character Class	Description
c	Case sensitive matching
i	Case insensitive matching
m	Treat source string as multi-line activating Anchor chars
n	Allow the period (.) to match any newline character

Posix Characters

Character Class	Description
[:alnum:]	Alphanumeric characters
[:alpha:]	Alphabetic characters
[:blank:]	Blank Space Characters
[:cntrl:]	Control characters (nonprinting)
[:digit:]	Numeric digits
[:graph:]	Any [:punct:],[:upper:],[:lower:],and [:digit:] chars
[:lower:]	Lowercase alphabetic characters
[:print:]	Printable characters
[:punct:]	Punctuation characters
[:space:]	Space characters (nonprinting),such as carriage return,newline,vertical tab,and form Feed
[:upper:]	Uppercase alphabetic characters
[:xdigit:]	Hexidecimal characters

Quantifier Characters

Character Class	Description
*	Match 0 or more times
?	Match 0 or 1 time
+	Match 1 or more times
{m}	Match exactly m times
{m,}	Match at least m times
{m,n}	Match at least m times but no more than n times
\n	Cause the prevIoUs expression to be repeated n times

Alternative Matching And Grouping Characters

Character Class	Description
\|	Separates alternates,often used with grouping operator ()
( )	Groups subexpression into a unit for alternations,for quantifiers,or for backreferencing (see "Backreferences" section)
[char]	Indicates a character list; most Metacharacters inside a character list are understood as literals,with the exception of character classes,and the ^ and - Metacharacters

Demo Table

CREATE TABLEtest (
testcolVARCHAR2(50));

INSERT INTOtestVALUES('abcde');
INSERT INTOtestVALUES('12345');
INSERT INTOtestVALUES('1a4A5');
INSERT INTOtestVALUES('12a45');
INSERT INTOtestVALUES('12aBC');
INSERT INTOtestVALUES('12abc');
INSERT INTOtestVALUES('12ab5');
INSERT INTOtestVALUES('12aa5');
INSERT INTOtestVALUES('12AB5');
INSERT INTOtestVALUES('ABCDE');
INSERT INTOtestVALUES('123-5');
INSERT INTOtestVALUES('12.45');
INSERT INTOtestVALUES('1a4b5');
INSERT INTOtestVALUES('1 3 5');
INSERT INTOtestVALUES('1 45');
INSERT INTOtestVALUES('1 5');
INSERT INTOtestVALUES('a b c d');
INSERT INTOtestVALUES('a b c d e');
INSERT INTOtestVALUES('a e');
INSERT INTOtestVALUES('Steven');
INSERT INTOtestVALUES('Stephen');
INSERT INTOtestVALUES('111.222.3333');
INSERT INTOtestVALUES('222.333.4444');
INSERT INTOtestVALUES('333.444.5555');
INSERT INTOtestVALUES('abcdefabcdefabcxyz');
COMMIT;

REGEXP_COUNT

Syntax

REGEXP_COUNT(<source_string>,<pattern>[[,<start_position>],[<match_parameter>]])

-- match parameter:
'c' = case sensitive
'i' = case insensitive search
'm' = treats the source string as multiple lines
'n' = allows the period (.) wild character to match newline
'x' = ignore whitespace characters

Count's occurrences based on a regular expression

SELECTREGEXP_COUNT(testcol,'2a',1,'i') RESULT
FROMtest;

SELECTREGEXP_COUNT(testcol,'e','i') RESULT
FROMtest;

REGEXP_INSTR

Syntax

REGEXP_INSTR(<source_string>,<start_position>][,<occurrence>][,<return_option>][,<match_parameter>][,<sub_expression>]])

Find character 'o' followed by any 3 alphabetic characters: case insensitive

Our thanks to Cassio for spotting a typo here.

SELECTREGEXP_INSTR('500Oracle Pkwy,Redwood Shores,CA','[o][[:alpha:]]{3}','i') RESULT
FROM DUAL;

SELECTREGEXP_INSTR('500 OraclePkwy,'i') RESULT
FROM DUAL;

SELECTREGEXP_INSTR('500 Oracle Pkwy,Redwood Shores,2,Redwood Shores,CA','i') RESULT
FROM DUAL;

Find the position of try,trying,tried or tries

SELECTREGEXP_INSTR('We are trying to make the subject easier.','tr(y(ing)?|(ied)|(ies))') RESULTNUM
FROM DUAL;

Using Sub-Expression option

SELECTtestcol,REGEXP_INSTR(testcol,'ab','i',0)
FROMtest;

SELECTtestcol,1)
FROMtest;

SELECTtestcol,'a(b)',1)
FROMtest;

REGEXP_LIKE

Syntax

REGEXP_LIKE(<source_string>,<pattern>,<match_parameter>)

AlphaNumeric Characters

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:alnum:]]');

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:alnum:]]{3}');

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:alnum:]]{5}');

Alphabetic Characters

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:alpha:]]');

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:alpha:]]{3}');

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:alpha:]]{5}');

Control Characters

INSERT INTOtestVALUES('zyx' ||CHR(13) || 'wvu');
COMMIT;

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:cntrl:]]{1}');

Digits

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:digit:]]');

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:digit:]]{3}');

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:digit:]]{5}');

Lower Case

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:lower:]]');

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:lower:]]{2}');

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:lower:]]{3}');

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:lower:]]{5}');

Printable Characters

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:print:]]{5}');

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:print:]]{6}');

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:print:]]{7}');

Punctuation

TRUNCATETABLEtest;

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:punct:]]');

Spaces

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:space:]]');

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:space:]]{2}');

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:space:]]{3}');

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:space:]]{5}');

Upper Case

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:upper:]]');

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:upper:]]{2}');

SELECT*
FROMtest
WHEREREGEXP_LIKE(testcol,'[[:upper:]]{3}');

Values Starting with 'a%b'

SELECTtestcol
FROMtest
WHEREREGEXP_LIKE(testcol,'^ab*');

'a' is the third value

SELECTtestcol
FROMtest
WHEREREGEXP_LIKE(testcol,'^..a.');

Contains two consecutive occurances of the letter 'a' or 'z'

SELECTtestcol
FROMtest
WHEREREGEXP_LIKE(testcol,'([az])\1','i');

Begins with 'Ste' ends with 'en' and contains either 'v' or 'ph' in the center

SELECTtestcol
FROMtest
WHEREREGEXP_LIKE(testcol,'^Ste(v|ph)en$');

Use a regular expression in a check constraint

CREATE TABLEmytest (c1VARCHAR2(20),
CHECK (REGEXP_LIKE(c1,'^[[:alpha:]]+$')));

Identify SSN

Thanks: Byron Bush HIoUG

CREATE TABLEssn_test (
ssn_colVARCHAR2(20));

INSERT INTOssn_testVALUES('111-22-3333');
INSERT INTOssn_testVALUES('111=22-3333');
INSERT INTOssn_testVALUES('111-A2-3333');
INSERT INTOssn_testVALUES('111-22-33339');
INSERT INTOssn_testVALUES('111-2-23333');
INSERT INTOssn_testVALUES('987-65-4321');
COMMIT;

SELECTssn_col
from ssn_test
WHEREREGEXP_LIKE(ssn_col,'^[0-9]{3}-[0-9]{2}-[0-9]{4}$');

REGEXP_REPLACE

Syntax

REGEXP_REPLACE(<source_string>,
<replace_string>,<position>,<occurrence>,<match_parameter>)

Looks for the pattern xxx.xxx.xxxx and reformats pattern to (xxx) xxx-xxxx

col testcol format a15
col result format a15

SELECTtestcol,REGEXP_REPLACE(testcol,
'([[:digit:]]{3})\.([[:digit:]]{3})\.([[:digit:]]{4})',
'(\1) \2-\3') RESULT
FROMtest
WHERE LENGTH(testcol) = 12;

Put a space after every character

SELECTtestcol,'(.)','\1 ') RESULT
FROMtest
WHEREtestcol like 'S%';

Replace multiple spaces with a single space

SELECTREGEXP_REPLACE('500 Oracle Parkway,'( ){2,}',' ') RESULT
FROM DUAL;

Insert a space between a lower case character followed by an upper case character

SELECTREGEXP_REPLACE('George McGovern','([[:lower:]])([[:upper:]])','\1 \2') CITY
FROM DUAL;
(Produces 'George Mc Govern')

Replace the period with a string (note use of '\')

SELECTREGEXP_REPLACE('We are trying to make the subject easier.','\.',' for you.') REGEXT_SAMPLE
FROM DUAL;

Demo

CREATE TABLEt(
testcolVARCHAR2(10));

INSERT INTOtVALUES('1');
INSERT INTOtVALUES('2 ');
INSERT INTOtVALUES('3 new ');

col newval format a10

SELECT LENGTH(testcol) len,testcol origval,
REGEXP_REPLACE(testcol,'\W+$',' ') newval,
LENGTH(REGEXP_REPLACE(testcol,' ')) newlen
FROMt;

Code snippet courtesy of Valentin Matak.
This is a handy way to remove duplicate characters from a string. This example shows the 1+ repeatability qualifier in use.

SELECTREGEXP_REPLACE('ABBBCABBCCCAABAAAAA','(A|B|C)\1+','\1')
FROM DUAL;

Code snippet courtesy of Jonathan Linder.
This checks for a valid email address and then extracts the domain name.

SELECTREGEXP_REPLACE('bugs.bunny@gmail.com','^(\S+)@(\S+)','\2')
FROM DUALd
WHERE
REGEXP_LIKE('bugs.bunny@gmail.com','^[A-Za-z0-9._%+-]+@([A-Za-z0-9-]+.)?+[A-Za-z]{2,63}$')

Code snippet courtesy of Juraj Drusc.
This example will convert Oracle's SYS_GUID() to a JAVA UUID.

SELECTlower(REGEXP_REPLACE(sys_guid(),
'([[:alnum:]]{8})([[:alnum:]]{4})([[:alnum:]]{4})([[:alnum:]]{4})([[:alnum:]]{12})',
'1-2-3-4-5'))FROM DUAL;

REGEXP_SUBSTR

Syntax

REGEXP_SUBSTR(source_string,pattern
[,position [,occurrence
[,match_parameter]]])

Searches for a comma followed by one or more occurrences of non-comma characters followed by a comma

SELECTREGEXP_SUBSTR('500 Oracle Parkway,',[^,]+,') RESULT
FROM DUAL;

Look for http:// followed by a substring of one or more alphanumeric characters and optionally,a period (.)

col result format a50

SELECTREGEXP_SUBSTR('Go to http://www.oracle.com/products and click on database',
'http://([[:alnum:]]+\.?){3,4}/?') RESULT
FROM DUAL;

Extracts try,tried or tries

SELECTREGEXP_SUBSTR('We are trying to make the subject easier.','tr(y(ing)?|(ied)|(ies))')
FROM DUAL;

Extract the 3rd field treating ':' as a delimiter

SELECTREGEXP_SUBSTR('system/pwd@orabase:1521:sidval',
'[^:]+',3) RESULT
FROM DUAL;

Extract from string with vertical bar delimiter

Equivalence classes

SELECTREGEXP_SUBSTR('iSelfSchooling NOT ISelfSchooling','[[=i=]]SelfSchooling') RESULT
FROM DUAL;

Parsing Demo

set serveroutput on

DECLARE
xVARCHAR2(2);
yVARCHAR2(2);
cVARCHAR2(40) := '1:3,4:6,8:10,3:4,7:6,11:12';
BEGIN
x := REGEXP_SUBSTR(c,'[^:]+',1);
y := REGEXP_SUBSTR(c,'[^,]+',3,1);

dbms_output.put_line(x ||' '|| y);
END;
/ Gary Whitaker wrote in with an addition to this parsing demo,and had the following comments:

The parsing demo above uses the regular expression '[^,]+' whichdoes not workwhen there is aNULLelement in the list. This Could result in returning the wrong element's data. Consider this simple example with comments:

set serveroutput on

DECLARE
xVARCHAR2(1);
yVARCHAR2(1);
zVARCHAR2(1);
cVARCHAR2(40) := '1,4,5,6,7';
BEGIN

-- Works as expected if the value you seek is before any null value in the list:

x := REGEXP_SUBSTR(c,2);

-- This form only returns the 4th element when all elements are present.
-- It will return the 4thnon-nullelement,which in this example is really '5',
-- which Could be misleading.
-- if you are really after the 4th element regardless if there is a null element:

y := REGEXP_SUBSTR(c,4);

-- This form gets the actual 4th element,allowing for the null element. -- Get the 1st substring of the 4th instance of a set of characters that are not a comma,-- when followed by a comma or the end of the line: z := REGEXP_SUBSTR(c,'([^,]*)(,|$)',NULL,1); dbms_output.put_line(x); dbms_output.put_line(y); dbms_output.put_line(z); END; /

oralce正则表达式函数

相关文章