Regular Expression Pocket Reference_ Regular Expressions for Perl, Ruby, PHP, Python, C, Java and .NET (Pocket Reference (O'Reilly)) ( PDFDrive )

You might also like

Download as pdf or txt
Download as pdf or txt
You are on page 1of 128

Constant width

Constant width italic

Constant width bold


foo|foobar

cat|category
alert backspace escape character form feed newline
carriage return horizontal tab vertical tab
\n

\b
backspace

\b backspace

\num

\015\012

\xnum \x{num} \unum \Unum

\x0D\x0A

\cchar

char
\cH

[...] [^...]
[...]
[^...]

-
[a-z]

\w \d \s \W \D \S

\s
\S
\d

[0-9]
[:alnum:]

[:lower:]
[[:lower:]] [a-z]
Alnum
Alpha
Blank
Cntrl
Digit
Graph
Lower
Print
Punct
Space
Upper
Xdigit

\p{prop} \P{prop}

Is In \p{Ll}

a α
\X

\P{M}\p{M} \X è;
e'
\p{L}
\p{Ll}
\p{Lm}
\p{Lo}

\p{Lt}
\p{Lu}
\p{C}
\p{Cc}
\p{Cf}
\p{Cn}
\p{Co}
\p{Cs}
\p{M}

\p{Mc}

\p{Me}

\p{Mn}

\p{N}
\p{Nd}
\p{Nl}
\p{No}

\p{P}
\p{Pc}
\p{Pd}
\p{Pe} \p{Ps}
\p{Pi}
\p{Pf}
\p{Po}
\p{Ps}
\p{S}
\p{Sc}
\p{Sk}
\p{Sm}
\p{So}
\p{Z}
\p{Zl}
\p{Zp}
\p{Zs}

^ \A

^
\A

$ \Z \z
$ $
\Z

\z
\G
\G

\b \B \< \>

\b
\B

\< \>
(?=...) (?!...)
(?<=...) (?<!...)

foo(?=bar)
foo foobar food

m
^ $

s
.

i
x

(?i) (?-i) (?mod:...)

(?mod)
(?-mod)
( mod ...)

use (?i:perl) use


perl use Perl use PeRl
(?#...) #
#
(?#...)

.{0,80}(?#Field limit
is 80 chars)
.{0,80}
\Q...\E
\Q \E
\Q(.*)\E \(\.\*\)

(...) \1 \2

\1 \2
\b(\w+)\b
\s+\1\b the the
(?:...)

(?:foobar)
foobar
(?<name>...)

name Subject:(?<subject>.*)
Subject
subject
(?>...)

(?>[ab]*)\w\w aabbcc aabbaa


...|...

\b(foo|bar)\b
foo bar
(?(if)then |else)
if

then else
if then
else (<)?foo(?(1)>|bar)
<foo> foobar
* + ? {num num }

(ab)+ ababababab
*? +? ?? {num num }?

(an)+? an banana
*+ ++ ?+ {num num }+

(ab)++ab ababababab

\w
è; e

/pattern/mode
s/pattern/replacement/mode
s/^\s+//
s/\s+$//

foo bar foo


"foo bar"

/^\d{1,6}$/

42 678234
10,000

/^#([a-fA-F0-9]){3}(([a-fA-F0-9]){3})?$/

#fff #1a1 #996633


#ff FFFFFF

/^\d{3}-\d{2}-\d{4}$/

078-05-1120
078051120 1234-12-12

/^\d{5}(-\d{4})?$/

94941-3232 10024
949413232

/^\$\(d{1,3}(\,\d{3})*|\d+)(\.\d{2})?$/

$20 $15,000.01
$1.001 $.99
/^\d\d\/\d\d\/\d\d\d\d \d\d:\d\d:\d\d$/

04/30/1978 20:45:38
4/30/1978 20:45:38 4/30/78

/^.*\//

/usr/local/bin/apachectl
C:\\System\foo.exe

/^(\d|[01]?\d\d|2[0-4]\d|25[0-5])\.(\d|[01]?\d\d|2[0-4]
\d|25[0-5])\.
(\d|[01]?\d\d|2[0-4]\d|25[0-5])\.(\d|[01]?\d\d|2[0-4]
\d|25[0-5])$/

127.0.0.1 224.22.5.110
127.1

/^([0-9a-fA-F]{2}:){5}[0-9a-fA-F]{2}$/

01:23:45:67:89:ab
01:23:45 0123456789ab

/^[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z_+])*@([0-9a-zA-Z][-\w]*
[0-9a-zA-Z]\.)+[a-zA-Z]{2,9}$/

tony@mail.
example.museum
.@example.com
/(https?):\/\/([0-9a-zA-Z][-\w]*[0-9a-zA-Z]\.)+
[a-zA-Z]{2,9})
(:\d{1,4})?([-\w\/#~:.?+=&%@~]*)/

http://foo.com:8080/bar.html

\a
\b

\e x1B
\n x0A x0D
\r x0D x0A
\f x0C
\t x09
\octal
\xhex
\x{hex}
\cchar
\N{name}
PATH_TO_PERLLIB/unicode/Names.txt use
charnames ':full'

[...]
[^...]

[ class:]

. /s
\C

\X

\w \p{IsWord}
\W \P{IsWord}
\d \p{IsDigit}
\D \P{IsDigit}
\s \p{IsSpace}
\S \P{IsSpace}
\p{prop
\P{prop
^ /m

\A
$
/m

\Z

\z
\G
\b
\B
(?=...)
(?!...)
(?<=...)
(?<!...)

/i
/m ^ $ \n
/s .
/x #
/o
(?mode) xsmi

(?-mode) xsmi

(?mode:...) xsmi

(?-mode:...) xsmi
(?#...)
#... /x
\u
\l
\U
\L
\Q
\E \U \L \Q

(...) \1 \2 $1
$2
\n n
(?:...)
(?>...)
...|...
*
+
?
{n} n
{n,} n
{x,y} x y
*?
+?
??
{n,}? n
{x,y}? x y
(?(COND)...| COND
...)
(?(COND)...)
(?{CODE})
(??{CODE})

(?<name>...)
(?'name'...)
\k<name>
\k'name'
%+
$+{foo}
%-
$-{foo}[0]
\g{n} \gn n
\g{-n} \g-n n
(?n) n
(?&NAME)
(?R)
?(DEFINE)...
(*FAIL)
(*ACCEPT)

(*PRUNE)
(*MARK:name)
$REGMARK
(*SKIP:name) MARK

(*THEN)
(*COMMIT)
/p ${^PREMATCH} ${MATCH}
${^POSTMATCH}
\K

qr//
m// s/// split

qr#...# m!...! m{...}


s|...|...| s[...][...] s<...>/.../

/.../
m
/.../ #same as m/.../

\N{name} \u \l \U \L \Q
\E

qr/PATTERN/ismxo
PATTERN

/ismxo
m/PATTERN/imsxocg
PATTERN
(1)
()
1 "" /imsxo
/cg /g

/g
/g

/cg

s/PATTERN/REPLACEMENT/egimosx
PATTERN
REPLACEMENT /imosx
/g PATTERN
/e REPLACEMENT

split /PATTERN/, EXPR, LIMIT


split /PATTERN/, EXPR
split /PATTERN/
split
PATTERN EXPR
LIMIT
LIMIT
m split m{PATTERN}
m{}
$1, $2, ...
@- $-[0]
$-[n] $n
@+ $+[0]
$+[n] $n
$+
$'
substr($input, 0, $-[0])
$&
substr($input, $-[0], $+[0] - $-[0])
$`
substr($input, $+[0])
$^N
$* /m /s
$^R

\w \d \s \b
use
locale i \L \l \U \u \w
\W

Is
In
IsASCII [\x00-\x7f]
IsAlnum [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Nd}]
IsAlpha [\p{Ll}\p{Lu}\p{Lt}\p{Lo}]
IsCntrl \p{C}
IsDigit \p{Nd}
IsGraph [^\p{C}\p{Space}]
IsLower \p{Ll}
IsPrint \P{C}
IsPunct \p{P}
IsSpace [\t\n\f\r\p{Z}]
IsUppper [\p{Lu}\p{Lt}]
IsWord [_\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Nd}]
IsXDigit [0-9a-fA-F]

# Find Spider-Man, Spiderman, SPIDER-MAN, etc.


my $dailybugle = "Spider-Man Menaces City!";
if ($dailybugle =~ m/spider[- ]?man/i) { do_something( ); }

# Match dates formatted like MM/DD/YYYY, MM-DD-YY,...


my $date = "12/30/1969";
my $regex = qr!^(\d\d)[-/](\d\d)[-/](\d\d(?:\d\d)?)$!;
if ($date =~ $regex) {
print "Day= ", $1,
" Month=", $2,
" Year= ", $3;
}
# Convert <br> to <br /> for XHTML compliance
my $text = "Hello World! <br>";
$text =~ s#<br>#<br />#ig;

# urlify - turn URLs into HTML links


$text = "Check the web site, http://www.oreilly.com/catalog/
regexppr.";
$text =~
s{
\b # start at word boundary
( # capture to $1
(https?|telnet|gopher|file|wais|ftp) :
# resource and colon
[\w/#~:.?+=&%@!\-] +? # one or more valid
# characters
# but take as little as
# possible
)
(?= # lookahead
[.:?\-] * # for possible punctuation
(?: [^\w/#~:.?+=&%@!\-] # invalid character
| $ ) # or end of string
)
}{<a href="$1">$1</a>}igox;

perlre
java.
util.regex

java.util.regex

\a
\b x08
\e x1B
\n x0A
\r x0D
\f x0C
\t x09
\0octal

\xhex
\uhex

\cchar
[...]
[^...]

. DOTALL

\w [a-zA-Z0-9_]
\W [^a-zA-Z0-9_]
\d [0-9]
\D [^0-9]
\s [ \t\n\f\r\x0B]
\S [^ \t\n\f\r\x0B]
\p{prop}

\P{prop}

^
MULTILINE
\A
$
MULTILINE
\Z

\z
\b
\B
\G
(?=...)
(?!...)
(?<=...)
(?<!...)

Pattern.UNIX_LINES d \n

Pattern.DOTALL s .

Pattern.MULTILINE m ^ $

Pattern.COMMENTS x

#
Pattern.CASE_ i
INSENSITIVE
Pattern.UNICODE_ u
CASE
Pattern.CANON_EQ

(?mode)
idmsux

(?-mode)
idmsux

(?mode:...)
idmsux
(?-mode:...)
idmsux
#...
/x

(...) \1 \2
$1 $2
\n n
$n
n
(?:...)
(?>...
...|...
*
+
?
{n} n
{n,} n
{x,y} x y
*?
+?
??
{n,}? n
{x,y}? x y

*+
++
?+
{n}+ n
{n,}+ n
{x,y}+ x y

java.util.regex.Pattern java.util.regex.Matcher
java.util.regex.PatternSyntaxException
CharSequence String
CharSequence
Pattern
CharSequence
Matcher
Pattern
CharSequence
String
\n \\n
String

boolean matches(String regex)


regex String
String[ ] split(String regex)

regex
String [ ] split(String regex, int limit)
limit-1
regex
String replaceFirst(String regex, String replacement)
regex replacement
String replaceAll(String regex, String replacement)
regex replacement

static Pattern compile(String regex)


Pattern regex
static Pattern compile(String regex, int flags)
Pattern regex
flags
int flags( )
Pattern
Matcher matcher(CharSequence input)
Matcher Pattern
input
static boolean matches(String regex, CharSequence input)
regex input
String pattern( )
Pattern
static String quote(String text)

String[ ] split(CharSequence input)

Pattern input
String[ ] split(CharSequence input, int limit)
limit
regex
Matcher appendReplacement(StringBuffer sb, String replacement)
replacement sb
StringBuffer appendTail(StringBuffer sb)
sb
int end( )

int end(int group)


group
boolean find( )

boolean find(int start)


start
String group( )
Pattern
String group(int group)
group
int groupCount( )
Pattern
boolean hasAnchoringBounds( )
Matcher

boolean hasTransparentBounds( )
Matcher

boolean hitEnd( )
boolean lookingAt( )

boolean matches( )
Pattern
Pattern pattern( )
Pattern Matcher
static String quoteReplacement(String string)

Matcher region(int start, int end)

start end

int regionStart( )

int regionEnd( )

String replaceAll(String replacement)


replacement
String replaceFirst(String replacement)
replacement
boolean requireEnd( )

Matcher reset( )

Matcher reset(CharSequence input)


input
int start( )

int start(int group)


group
MatchResult toMatchResult( )
MatchResult
String toString( )
Matcher useAnchorBounds(boolean b)
Matcher

Matcher usePattern(Pattern p)
Matcher

Matcher useTransparentBounds(boolean b)
Matcher

PatternSyntaxException(String desc, String regex, int index)

String getDescription( )

int getIndex( )

String getMessage( )

String getPattern( )
char charAt(int index)
index
int length( )

CharSequence subSequence(int start, int end)


start
end
String toString( )
String

\w \W \d \D \s
\S
\p{L} \P{L} \p{Nd} \P{Nd} \p{Z} \P{Z}
\b \B

\p{Lu} \p{Lowercase_Letter}
In
\p{InGreekExtended} \p{In_
Greek_Extended} \p{In Greek Extended}

import java.util.regex.*;

// Find Spider-Man, Spiderman, SPIDER-MAN, etc.


public class StringRegexTest {
public static void main(String[] args) throws Exception {
String dailyBugle = "Spider-Man Menaces City!";

//regex must match entire string


String regex = "(?i).*spider[- ]?man.*";

if (dailyBugle.matches(regex)) {
System.out.println("Matched: " + dailyBugle);
}
}
}

// Match dates formatted like MM/DD/YYYY, MM-DD-YY,...


import java.util.regex.*;

public class MatchTest {


public static void main(String[] args) throws Exception {
String date = "12/30/1969";
Pattern p =
Pattern.compile("^(\\d\\d)[-/](\\d\\d)[-/](\\d\\d(?:\\d\
\d)?)$");

Matcher m = p.matcher(date);

if (m.find( )) {
String month = m.group(1);
String day = m.group(2);
String year = m.group(3);
System.out.printf("Found %s-%s-%s\n", year, month, day);
}
}
}

// Example -. Simple substitution


// Convert <br> to <br /> for XHTML compliance
import java.util.regex.*;

public class SimpleSubstitutionTest {


public static void main(String[] args) {
String text = "Hello world. <br>";
Pattern p = Pattern.compile("<br>", Pattern.CASE_
INSENSITIVE);
Matcher m = p.matcher(text);

String result = m.replaceAll("<br />");


System.out.println(result);
}
}

// urlify - turn URLs into HTML links


import java.util.regex.*;

public class Urlify {


public static void main (String[ ] args) throws Exception {
String text = "Check the web site, http://www.oreilly.com/
catalog/regexppr.";
String regex =
"\\b # start at word\n"
+ " # boundary\n"
+ "( # capture to $1\n"
+ "(https?|telnet|gopher|file|wais|ftp) : \n"
+ " # resource and colon\n"
+ "[\\w/\\#~:.?+=&%@!\\-] +? # one or more valid\n"
+ " # characters\n"
+ " # but take as little\n"
+ " # as possible\n"
+ ")\n"
+ "(?= # lookahead\n"
+ "[.:?\\-] * # for possible punc\n"
+ "(?: [^\\w/\\#~:.?+=&%@!\\-] # invalid character\n"
+ "| $ ) # or end of string\n"
+ ")";

Pattern p = Pattern.compile(regex,
Pattern.CASE_INSENSITIVE + Pattern.COMMENTS);
Matcher m = p.matcher(text);
String result = m.replaceAll("<a href=\"$1\">$1</a>");
System.out.println(result);
}
}
\a \x07
\b \x08
\e \x1B
\n \x0A
\r \x0D
\f \x0C
\t \x09
\v \x0B
\0octal
\xhex
\uhex
\cchar

[...]

[^...]

.
s
\w [\p{Ll}\p{Lu}\p{Lt}\p{Lo}
\p{Nd}\p{Pc}] [a-zA-Z_0-9]
ECMAScript
\W [\p{Ll}\p{Lu}\p{Lt}
\p{Lo}\p{Nd}\p{Pc}] [^a-zA-Z_0-9]
ECMAScript
\d \p{Nd} [0-9] ECMAScript
\D \P{Nd} [^0-9] ECMAScript
\s [ \f\n\r\t\v\x85\p{Z}]
[ \f\n\r\t\v] ECMAScript
\S [^ \f\n\r\t\v\x85
\p{Z}] [^ \f\n\r\t\v] ECMAScript
\p{prop}
\P{prop}
^
MULTILINE
\A
$
MULTILINE
\Z

\z
\b \w \W
\B
\G
(?=...)
(?!...)
(?<=...)
(?<!...)

Singleline s .

Multiline m ^ $

IgnorePatternWhite x
space
#
IgnoreCase i

CultureInvariant i
ExplicitCapture n
Compiled
RightToLeft

ECMAScript ECMAScript

IgnoreCase Multiline
(?imnsx-imnsx)

(?imnsx-imnsx:...)

(?#...)
#...
/x

(... \1 \2 $1 $2
\n
n
$n n

(?<name>...) name
(?:...)
(?>...)
...|...
*
+
?
{n} n
{n,} n
{x,y} x y
*?
+?
??
{n,}? n
{x,y}? x y

$1, $2, ...


${name}
$
$&
$`
$+
$_

System.
Text.RegularExpressions RegExp( )

RegExp Groups
Match
@""

RegExp

public Regex(string pattern)


public Regex(string pattern RegexOptions options)
pattern
options
public static void CompileToAssembly(RegexCompilationInfo[ ]
regexinfos System.Reflection.AssemblyName assemblyname)
public static void CompileToAssembly(RegexCompilationInfo[ ]
regexinfos System.Reflection.AssemblyName assemblyname)
public static void CompileToAssembly(RegexCompilationInfo[ ]
regexinfos System.Reflection.AssemblyName assemblyname
System.Reflection.Emit.CustomAttributeBuilder[ ] attributes)
public static void CompileToAssembly(RegexCompilationInfo[ ]
regexinfos System.Reflection.AssemblyName assemblyname
System.Reflection.Emit.CustomAttributeBuilder[ ] attributes
string resourceFile)
Regex
regexinfos
assemblyname attributes
resourceFile

public static string Escape(string str)

#
public static bool IsMatch(string input string pattern)
public static bool IsMatch(string input string pattern
RegexOptions options)
public bool IsMatch(string input)
public bool IsMatch(string input int startat)

input
pattern options
startat
input

public static Match Match(string input string pattern)


public static Match Match(string input string pattern
RegExpOptions options)
public Match Match(string input)
public Match Match(string input int startat)
public Match Match(string input int startat int length)
input
Match

pattern options
startat length

public static MatchCollection Matches(string input string


pattern)
public static MatchCollection Matches(string input string
pattern RegExpOptions options)
public MatchCollection Matches(string input)
public MatchCollection Matches(string input int startat)
input
MatchCollection

pattern options
startat
input
public static string Replace(string input pattern
MatchEvaluator evaluator)
public static string Replace(string input pattern
MatchEvaluator evaluator RegexOptions options)
public static string Replace(string input pattern string
replacement)
public static string Replace(string input pattern string
replacement RegexOptions options)
public string Replace(string input MatchEvaluator evaluator)
public string Replace(string input MatchEvaluator evaluator
int count)
public string Replace(string input MatchEvaluator evaluator
int count int startat)
public string Replace(string input string replacement)
public string Replace(string input string replacement int count)
public string Replace(string input string replacement int
count int startat)
input
replacement
MatchEvaluator replacement
$n ${name}
options
count
startat
input
public static string[ ] Split(string input string pattern)
public static string[ ] Split(string input string pattern
RegexOptions options)
public static string[ ] Split(string input)
public static string[ ] Split(string input int count)
public static string[ ] Split(string input int count int
startat)

count
input startat
public bool Success

public string Value

public int Length

public int Index

public GroupCollection Groups


GroupCollection Groups[0].value
Groups

public Match NextMatch( )


Match

public virtual string Result(string result)


result

public static Match Synchronized(Match inner)


Match inner

public bool Success

public string Value


public int Length

public int Index

\w \d \s

ECMAScript
Thread.
CurrentCulture CultureInvariant

Is

//Find Spider-Man, Spiderman, SPIDER-MAN, etc.


namespace Regex_PocketRef
{
using System.Text.RegularExpressions;

class SimpleMatchTest
{
static void Main( )
{
string dailybugle = "Spider-Man Menaces City!";

string regex = "spider[- ]?man";


if (Regex.IsMatch(dailybugle, regex, RegexOptions.
IgnoreCase)) {
//do something
}
}
}

//Match dates formatted like MM/DD/YYYY, MM-DD-YY,...


using System.Text.RegularExpressions;

class MatchTest
{
static void Main( )
{
string date = "12/30/1969";
Regex r =
new Regex( @"^(\d\d)[-/](\d\d)[-/](\d\d(?:\d\d)?)$" );

Match m = r.Match(date);

if (m.Success) {
string month = m.Groups[1].Value;
string day = m.Groups[2].Value;
string year = m.Groups[3].Value;
}
}
}

//Convert <br> to <br /> for XHTML compliance


using System.Text.RegularExpressions;

class SimpleSubstitutionTest
{
static void Main( )
{
string text = "Hello world. <br>";
string regex = "<br>";
string replacement = "<br />";

string result =
Regex.Replace(text, regex, replacement, RegexOptions.
IgnoreCase);
}
}

//urlify - turn URLs into HTML links


using System.Text.RegularExpressions;

public class Urlify


{
static Main ( )
{
string text = "Check the web site, http://www.oreilly.com/
catalog/regexppr.";
string regex =
@"\b # start at word boundary
( # capture to $1
(https?|telnet|gopher|file|wais|ftp) :
# resource and colon
[\w/#~:.?+=&%@!\-] +? # one or more valid
# characters
# but take as little as
# possible
)
(?= # lookahead
[.:?\-] * # for possible
# punctuation
(?: [^\w/#~:.?+=&%@!\-] # invalid character
| $ ) # or end of string
)";

Regex r = new Regex(regex, RegexOptions.IgnoreCase


| RegexOptions.IgnorePatternWhitespace);
string result = r.Replace(text, "<a href=\"$1\">$1</a>");
}
}
preg
preg

\a \x07
\b \x08
\e \x1B
\n \x0A
\r \x0D
\f \x0C
\t x09
\octal
\xhex
\x{hex}
\cchar

[...]
[^...]

[ class:]

. /s
\C

\w [a-zA-z0-9_]
\W [^a-zA-z0-9_]
\d [0-9]
\D [^0-9]
\s [\n\r\f\t ]
\S [^\n\r\f\t ]

^
/m
\A
$
/m
\Z

\z
\G
\b \w
\W

\B
(?=...)
(?!...)
(?<=...)
(?<!...)

i
m ^ $ \n
s .
x #
U * *?

A
D $

(?mode) imsxU

(?-mode) imsxU

(?mode:...) xsmi
(?-mode:...) xsmi
(?#...)
#... x
\Q
\E \Q

(...) \1
\2
(?P<name>...)
name
\n n

(?:...)
(?>...)
...|...
*
+
?
{n} n
{n,} n
{x,y} x y
*?
+?
??
{n,}? n
{x,y}? x y

*+
++
?+
{n,}+ n
{x,y}+ x y

(?(condition ... ... condition

(?(condition ... condition

array preg_grep (string pattern array input)


input
pattern
int preg_match_all (string pattern string subject array
matches int flags )
pattern subject

matches
matches
n matches
n
matches[7][3]
pattern
subject
matches
PREG_SET_ORDER PREG_SET_ORDER
matches

PREG_
OFFSET_CAPTURE

subject
int preg_match (string pattern string subject array
matches int flags )
pattern subject
matches
matches[0]

PREG_OFFSET_CAPTURE matches

string preg_quote (string str [, string delimiter])


str
delimiter

str
mixed preg_replace_callback (mixed pattern callback
callback mixed subject int limit )
subject pattern
callback

limit
limit
pattern
callback subject
mixed preg_replace (mixed pattern mixed replacement mixed
subject , int limit )
subject pattern
replacement limit
limit

$n \n
pattern /e replacement

pattern
replacement replacement
replacement subject

array preg_split (string pattern string subject , int


limit , int flags )
pattern limit
preg_split( ) limit
limit
PREG_SPLIT_NO_EMPTY
PREG_SPLIT_DELIM_CAPTURE

PREG_SPLIT_OFFSET_CAPTURE

subject

//Find Spider-Man, Spiderman, SPIDER-MAN, etc.


$dailybugle = "Spider-Man Menaces City!";

$regex = "/spider[- ]?man/i";

if (preg_match($regex, $dailybugle)) {
//do something
}
//Match dates formatted like MM/DD/YYYY, MM-DD-YY,...
$date = "12/30/1969";
$p = "!^(\\d\\d)[-/](\\d\\d)[-/](\\d\\d(?:\\d\\d)?)$!";

if (preg_match($p,$date,$matches) {
$month = $matches[1];
$day = $matches[2];
$year = $matches[3];
}

//Convert <br> to <br /> for XHTML compliance


$text = "Hello world. <br>";

$pattern = "{<br>}i";

echo preg_replace($pattern, "<br />", $text);

//urlify - turn URLs into HTML links


$text = "Check the web site, http://www.oreilly.com/catalog/
regexppr.";
$regex =
"{ \\b # start at word\n"
. " # boundary\n"
. "( # capture to $1\n"
. "(https?|telnet|gopher|file|wais|ftp) : \n"
. " # resource and colon\n"
. "[\\w/\\#~:.?+=&%@!\\-]+? # one or more valid\n"
. " # characters\n"
. " # but take as little as\n"
. " # possible\n"
. ")\n"
. "(?= # lookahead\n"
. "[.:?\\-]* # for possible punct\n"
. "(?:[^\\w/\\#~:.?+=&%@!\\-] # invalid character\n"
. "|$) # or end of string\n"
. ") }x";

echo preg_replace($regex, "<a href=\"$1\">$1</a>", $text);


re re

re

re

\a \x07
\b \x08
\n \x0A
\r \x0D
\f \x0C
\t \x09
\v \x0B
\octal
\xhh
\uhhhh
\Uhhhhhhhh

[...]
[^...]

. DOTALL
\w [a-zA-z0-9_] LOCALE
UNICODE
\W [^a-zA-z0-9_] LOCALE
UNICODE
\d [0-9]
\D [^0-9]
\s [ \t\n\r\f\v]
\S [ \t\n\r\f\v]

^
MULTILINE
\A
$
MULTILINE

\Z

\b
\B
(?=...)
(?!...)
(?<=...)
(?<!...)

I IGNORECASE i
L LOCALE L \w \W \b \B

M MULTILINE (?m) m ^ $
\n
S DOTALL (?s) s .
U UNICODE (?u) u \w \W \b \B

X VERBOSE (?x) x
#
(?mode)
iLmsux

(?#...)
#...
VERBOSE

(...) \1 \2
(?P<name> ...)
name
(?P=name
name
\n n
(?:...)
...|...
*
+
?
{n} n
{x,y} x y
*?
+?
??
{x,y}? x y

re

r'' r""

r'\n'
\\n

r'''text''' r"""text"""
re
compile(pattern [ flags])

flags
match(pattern string [ flags])
pattern string
None
search(pattern string flags )
pattern string
None
split(pattern string maxsplit )
string pattern
maxsplit

sub(pattern repl string count )


count pattern
string repl repl

subn(pattern repl string count )


sub( )

findall(pattern string)
pattern string pattern

finditer(pattern string)
pattern string

escape(string)

string
exception error
re.compile
flags

groupindex

pattern

match(string pos endpos )


search(string pos endpos )
split(string maxsplit )
sub(repl string count )
subn(repl string count )
findall(string)
re pattern
pos endpos

match find
pos
endpos
pos endpos search match
re
match search

string
match search
group([g1 g2 ])
None
groups([default])

None default
groupdict([default])

None
default
start([group])
group
group
end([group])
group
group
span([group])
group
group
expand([template])

template

lastgroup
None

lastindex
None

re

\u UNICODE \w
\W \b \B
re
#Find Spider-Man, Spiderman, SPIDER-MAN, etc.
import re

dailybugle = 'Spider-Man Menaces City!'


pattern = r'spider[- ]?man.'

if re.match(pattern, dailybugle, re.IGNORECASE):


print dailybugle

#Match dates formatted like MM/DD/YYYY, MM-DD-YY,...


import re

date = '12/30/1969'

regex = re.compile(r'^(\d\d)[-/](\d\d)[-/](\d\d(?:\d\d)?)$')

match = regex.match(date)

if match:
month = match.group(1) #12
day = match.group(2) #30
year = match.group(3) #1969

#Convert <br> to <br /> for XHTML compliance


import re

text = 'Hello world. <br>'


regex = re.compile(r'<br>', re.IGNORECASE);
repl = r'<br />'

result = regex.sub(repl,text)
#urlify - turn URLs into HTML links
import re

text = 'Check the web site, http://www.oreilly.com/catalog/


regexppr.'

pattern = r'''
\b # start at word boundary
( # capture to \1
(https?|telnet|gopher|file|wais|ftp) :
# resource and colon
[\w/#~:.?+=&%@!\-] +? # one or more valid chars
# take little as possible
)
(?= # lookahead
[.:?\-] * # for possible punc
(?: [^\w/#~:.?+=&%@!\-] # invalid character
| $ ) # or end of string
)'''

regex = re.compile(pattern, re.IGNORECASE


+ re.VERBOSE)

result = regex.sub(r'<a href="\1">\1</a>', text)

Regexp String
\a \x07
\b \x08
\e \x1B
\n \x0A
\r \x0D
\f \x0C
\t \x09
\v \x0B
\0octal
\xhex
\cchar

[...]
[^...]
.
s
\w
\W
\d
\D
\s [ \f\n\r\t\v]
\S [^ \f\n\r\t\v]

^
\A
$
\Z

\z
\b \w \W
\B
\G
(?=...)
(?!...)

m .

x
#
i

n
o #{...}

(?imns-imns)
(?imns-imns:
...)
(?#...)
#... /x
(?<=...)
(?<!...)

(...) \1 \2 $1 $2
(?<name>...) \k<name>

\n n

$n n
\k<name>

(?:...)
(?>...)
...|...
*
+
?
{n} n
{n,} n
{x,y} x y
*?
+?
??
{n,}? n
{x,y}? x y

$1, $2, ...


${name}
$'
$&
$`
$+

Regexp MatchData
String
/.../ =~
/.../
Regexp.new =~ String#match /.../
Regexp
"foo, bar, frog".split(/,\s*/)
string =~ regexp => fixnum or nil
regexp
nil
regexp === string => boolean
regexp matches the string

gsub(pattern, replacement) => new_string


gsub(pattern) {|match| block } => new_string
string pattern
replacement
Regexp#sub
gsub!(pattern, replacement) => string or nil
gsub!(pattern) {|match| block } => string or nil
String#gsub
nil
index(regexp [, offset]) => fixnum or nil
regexp nil
offset

match(pattern) => matchdata or nil


pattern Regexp
MatchData nil
rindex(regexp [, fixnum]) => fixnum or nil
regexp nil
offset
scan(regexp) => array
scan(regexp) {|match, ...| block } => string

regexp

[regexp] => substring or nil


[regexp, fixnum] => substring or nil
slice(regexp) => substring or nil
slice(regexp, fixnum) => substring or nil
nil fixnum

slice!(regexp) => new_str or nil

nil
split(pattern=$;, [limit]) => anArray

Regexp
limit limit limit
limit

sub(regexp, replacement) => new_string


sub(regexp) {|match| block } => new_string
regexp
replacement
replacement
\1, \2, ..., \n
$1 $2 $` $& $´
sub!(pattern, replacement) => string or nil
sub!(pattern) {|match| block } => string or nil
String#sub
string nil
escape(string) => escaped_string
quote(string) => escaped_string

last_match => matchdata


last_match(n) => string
MatchData n
MatchData
Regexp.new(pattern [, options [, lang]]) => regexp
Regexp.compile(pattern [, options [, lang]]) => regexp
Regexp
Regexp::EXTENDED
Regexp::IGNORECASE Regexp::MULTILINE lang
'n' 'N'
'e' 'E' 's' 'S' 'u' 'U'
Regexp.union([pattern]*) => new_str
Regexp

Regexp

regexp == second_regexp => boolean


regexp.eql?(second_regexp) => boolean
Regexp

match(string) => matchdata or nil


MatchData nil

casefold? => true or false


IGNORECASE
inspect => string
Regexp
kcode => string
Regexp
options => fixnum

Regexp
Regexp
source => string

to_s => string

(?imns-imns:...)

[i] => string


[start, length] => array
[range] => array

n
begin(n) => integer
n
captures => array
MatchData#to_a
end(n) => integer
n
length => integer
size => integer

offset(n) => array

n
post_match => string

$`
pre_match => string

$`
select([index]*) => array
index

string => original_string

to_a => anArray

to_s => string

$KCODE = "UTF8"
\w \d \s \b

Regexp.new
/n

#Find Spider-Man, Spiderman, SPIDER-MAN, etc.

dailybugle = 'Spider-Man Menaces City!'

if dailybugle.match(/spider[- ]?man./i)
puts dailybugle
end

#Match dates formatted like MM/DD/YYYY, MM-DD-YY,...

date = '12/30/1969'

regexp = Regexp.new('^(\d\d)[-/](\d\d)[-/](\d\d(?:\d\d)?)$')
if md = regexp.match(date)
month = md[1] #12
day = md[2] #30
year = md[3] #1969
end

#Convert <br> to <br /> for XHTML compliance

text = 'Hello world. <br>'


regexp = Regexp.new('<br>', Regexp::IGNORECASE)

result = text.sub(regexp, "<br />")

#urlify - turn URLs into HTML links


text = 'Check the web site, http://www.oreilly.com/catalog/
regexppr.'

regexp = Regexp.new('
\b # start at word boundary
( # capture to \1
(https?|telnet|gopher|file|wais|ftp) :
# resource and colon
[\w/#~:.?+=&%@!\-] +? # one or more valid chars
# take little as possible
)
(?= # lookahead
[.:?\-] * # for possible punc
(?: [^\w/#~:.?+=&%@!\-] # invalid character
| $ ) # or end of string
)', Regexp::EXTENDED)

result = text.sub(regexp, '<a href="\1">\1</a>')


\0 \x00
\b \x08
\n \x0A
\r \x0D
\f \x0C
\t \x09
\t \x0B
\xhh
\uhhhh
\cchar
[...]
[^...]

. [^\x0A\x0D\
u2028\u2029]
\w [a-zA-Z0-9_]
\W [^a-zA-Z0-9_]
\d [0-9]
\D [^0-9]
\s
\S

^
/m
$
/m
\b
\B
(?=...)
(?!...)

m ^ $
i
(... \1 \2
$1 $2
\n n

$n n

(?:...)
...|...
*
+
?
{n} n
{n,} n
{x,y} x y
*?
+?
??
{n}? n
{x,y}? x y

String RegExp

\\w
\w
/pattern/img
pattern RegExp

search(pattern)
pattern
-1
replace(pattern replacement)
pattern
replacement pattern
pattern
$n
n pattern
match(pattern)
pattern
-1

g pattern

split(pattern limit)
pattern limit
limit
pattern pattern

new RegExp(pattern attributes)


/pattern/attributes
RegExp RegExp( )
/.../
pattern
attributes
g i m pattern
RegExp attributes

SyntaxError
pattern attributes
TypeError pattern RegExp
attributes

global
RegExp g
ignoreCase
RegExp i
lastIndex

multiline
RegExp m
source

exec(text)
text
null

g lastIndex

exec( ) test( ) lastIndex


lastIndex
lastIndex
RegExp

test(text)
true RegExp text test( )
exec( )
lastIndex
//Find Spider-Man, Spiderman, SPIDER-MAN, etc.
var dailybugle = "Spider-Man Menaces City!";

//regex must match entire string


var regex = /spider[- ]?man/i;

if (dailybugle.search(regex)) {
//do something
}

//Match dates formatted like MM/DD/YYYY, MM-DD-YY,...


var date = "12/30/1969";
var p =
new RegExp("^(\\d\\d)[-/](\\d\\d)[-/](\\d\\d(?:\\d\\
d)?)$");

var result = p.exec(date);


if (result != null) {
var month = result[1];
var day = result[2];
var year = result[3];

//Convert <br> to <br /> for XHTML compliance


String text = "Hello world. <br>";

var pattern = /<br>/ig;

test.replace(pattern, "<br />");

//urlify - turn URLs into HTML links


var text = "Check the web site, http://www.oreilly.com/
catalog/regexppr.";
var regex =
"\\b" // start at word boundary
+ "(" // capture to $1
+ "(https?|telnet|gopher|file|wais|ftp) :"
// resource and colon
+ "[\\w/\\#~:.?+=&%@!\\-]+?" // one or more valid chars
// take little as possible
+ ")"
+ "(?=" // lookahead
+ "[.:?\\-]*" // for possible punct
+ "(?:[^\\w/\\#~:.?+=&%@!\\-]"// invalid character
+ "|$)" // or end of string
+ ")";

text.replace(regex, "<a href=\"$1\">$1</a>");


\a \x07
\b \x08
\e \x1B
\n \x0A
\r \x0D
\f \x0C
\t \x09
\octal
\xhex

\x{hex
\cchar
\p{prop}
\P{prop}

[...]
[^...]

[:class:]

.
PCRE_DOTALL
\C

\w [a-zA-z0-9_]
\W [^a-zA-z0-9_]
\d [0-9]
\D [^0-9]
\s [\n\r\f\t\v ]
\S [^\n\r\f\t\v ]
\R

^
PCRE_MULTILINE
\A
$

PCRE_MULTILINE
\Z

\z
\G
\b \w
\W

\B
(?=...)
(?!...)
(?<=...)
(?<!...)
PCRE_CASELESS i

PCRE_MULTILINE m ^ $
\n
PCRE_DOTALL s .
PCRE_EXTENDED x
#
PCRE_UNGREEDY U
*
*?

PCRE_ANCHORED

PCRE_DOLLAR_ $
ENDONLY

PCRE_NO_AUTO_
CAPTURE
PCRE_UTF8

PCRE_AUTO_CALLOUT
PCRE_DUPNAMES
PCRE_FIRSTLINE

PCRE_NEWLINE_CR
PCRE_NEWLINE_LF
PCRE_NEWLINE_CRLF
PCRE_NEWLINE_ANY
PCRE_NOTBOL
PCRE_NOTEOL
PCRE_NOTEMPTY

PCRE_NO_UTF8_CHECK
PCRE_PARTIAL
PCRE
PARTIAL PCRE_
ERROR_NO_MATCH
(?mode)
imsxU

(?-mode)
imsxU

(?mode:...)
imsx
(?-mode:...)
imsx
\Q

\E \Q
(?#...)
#...
PCRE_EXTENDED

(...)
\1 \2
(?P<name>...),
(?<name>),(?’name') name
(?P=name),\k<name>, \k'name'
\n, \gn, \g{n} n
(?:...)

(?>...)
...|...
*
+
?
{n} n
{n,} n
{x,y} x
y
*?

+?

??

{n,}? n

{x,y}? x y

*+

++

?+
{n}+ n

{n,}+ n

{x,y}+ x y
(?(condition)... ...)
condition

(?(condition)...)
condition

pcre.h libpcre.a
-lpcre
pcre_
compile( )
pcre_exec( )

pcre_free_substring( ) pcre_free_substring_
list( )

pcre *pcre_compile(const char *pattern int options const char


**errptr int *erroffset const unsigned char *tableptr)
pattern options
tableptr pcre_
maketables( )
errptr erroffset
pattern
int pcre_exec(const pcre *code const pcre_extra extra const
char *subject int length int startoffset int options int
*ovector int ovecsize)

code subject
length
ovector ovector
ovector

options pcre_extra
pcre_study( )
pcre_extra *pcre_study(const pcre *code int options const char
**errptr)
pcre_exec( )
code options
errptr
int pcre_copy_named_substring(const pcre *code const char
*subject int *ovector int stringcount const char *stringname
char *buffer int buffersize)

stringname buffer stringcount


ovector
pcre_exec( )
int pcre_copy_substring(const char *subject int *ovector int
stringcount int stringnumber char *buffer int buffersize)

stringnumber buffer stringcount


ovector
pcre_exec( )
int pcre_get_named_substring(const pcre *code const char
*subject int *ovector int stringcount const char *stringname
const char **stringptr)
stringptr
stringname
stringcount
ovector
pcre_exec( )
int pcre_get_stringnumber(const pcre *code const char *name)

name
int pcre_get_substring(const char *subject int *ovector int
stringcount int stringnumber const char **stringptr)
stringptr
stringnumber stringcount
ovector
pcre_exec( )
int pcre_get_substring_list(const char *subject int *ovector
int stringcount const char ***listptr)
listptr
void pcre_free_substring(const char *stringptr)
stringptr pcre_
get_named_substring( ) pcre_get_substring_list( )
void pcre_free_substring_list(const char **stringptr)
stringptr pcre_
get_substring_list( )
const unsigned char *pcre_maketables(void)

int pcre_fullinfo(const pcre *code const pcre_extra *extra int


what void *where)
what where
what PCRE_INFO_BACKREFMAX PCRE_INFO_
CAPTURECOUNT PCRE_INFO_FIRSTBYTE PCRE_INFO_FIRSTTABLE
PCRE_INFO_LASTLITERAL PCRE_INFO_NAMECOUNT PCRE_INFO_
NAMEENTRYSIZE PCRE_INFO_NAMETABLE PCRE_INFO_OPTIONS PCRE_
INFO_SIZE PCRE_INFO_STUDYSIZE
int pcre_config(int what void *where)
what
where what PCRE_CONFIG_UTF8 PCRE_
CONFIG_NEWLINE PCRE_CONFIG_LINK_SIZE PCRE_CONFIG_POSIX_
MALLOC_THRESHOLD PCRE_CONFIG_MATCH_LIMIT
char *pcre_version(void)

void *(*pcre_malloc)(size_t)
malloc( )
void (*pcre_free)(void *)
pcre_free( )
int (*pcre_callout)(pcre_callout_block *)
PCRE_UTF8

setlocale(LC_CTYPE, "fr");
tables = pcre_maketables( );
re = pcre_compile(..., tables);

#include <stdio.h>
#include <string.h>
#include <pcre.h>

#define CAPTUREVECTORSIZE 30 /* should be a multiple of 3 */

int main(int argc, char **argv)


{
pcre *regex;
const char *error;
int erroffset;
int capturevector[CAPTUREVECTORSIZE];
int rc;

char *pattern = "spider[- ]?man";


char *text ="SPIDERMAN menaces city!";

/* Compile Regex */
regex = pcre_compile(
pattern,
PCRE_CASELESS, /* OR'd mode modifiers */
&error, /* error message */
&erroffset, /* position in regex where error occurred */
NULL); /* use default locale */

/* Handle Errors */
if (regex = = NULL)
{
printf("Compilation failed at offset %d: %s\n", erroffset,
error);
return 1;
}

/* Try Match */
rc = pcre_exec(
regex, /* compiled regular expression */
NULL, /* optional results from pcre_study */
text, /* input string */
(int)strlen(text), /* length of input string */
0, /* starting position in input string */
0, /* OR'd options */
capturevector, /* holds results of capture groups */
CAPTUREVECTORSIZE);

/* Handle Errors */
if (rc < 0)
{
switch(rc)
{
case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
default: printf("Matching error %d\n", rc); break;
}
return 1;
}
return 0;
}

#include <stdio.h>
#include <string.h>
#include <pcre.h>
#define CAPTUREVECTORSIZE 30 /* should be a multiple of 3 */

int main(int argc, char **argv)


{
pcre *regex;
const char *error;
int erroffset;
int capturevector[CAPTUREVECTORSIZE];
int rc, i;

char *pattern = "(\\d\\d)[-/](\\d\\d)[-/](\\d\\d(?:\\d\\d)?)";


char *text ="12/30/1969";

/* Compile the Regex */


re = pcre_compile(
pattern,
PCRE_CASELESS, /* OR'd mode modifiers */
&error, /* error message */
&erroffset, /* position in regex where error occurred */
NULL); /* use default locale */

/* Handle compilation errors */


if (re = = NULL)
{
printf("Compilation failed at offset %d: %s\n",
erroffset, error);
return 1;
}

rc = pcre_exec(
regex, /* compiled regular expression */
NULL, /* optional results from pcre_study */
text, /* input string */
(int)strlen(text), /* length of input string */
0, /* starting position in input string */
0, /* OR'd options */
capturevector, /* holds results of capture groups */
CAPTUREVECTORSIZE);
/* Handle Match Errors */
if (rc < 0)
{
switch(rc)
{
case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
/*
Handle other special cases if you like
*/
default: printf("Matching error %d\n", rc); break;
}
return 1;
}

/* Match succeded */

printf("Match succeeded\n");

/* Check for output vector for capture groups */


if (rc = = 0)
{
rc = CAPTUREVECTORSIZE/3;
printf("ovector only has room for %d captured substrings\n",
rc - 1);
}

/* Show capture groups */

for (i = 0; i < rc; i++)


{
char *substring_start = text + ovector[2*i];
int substring_length = capturevector[2*i+1]
- capturevector[2*i];
printf("%2d: %.*s\n", i, substring_length, substring_start);
}

return 0;
}
RewriteRule LocationMatch DirectoryMatch FilesMatch
ProxyMatch AliasMatch

\octal
\xhex

\x{hex}
\cchar
[...]

[^...]

[ class:]

.
/s
\C

\w [a-zA-z0-9_]
\W [^a-zA-z0-9_]
\d [0-9]
\D [^0-9]
\s [\n\r\f\t ]
\S [^\n\r\f\t ]

^
$
\b \w
\W

\B
(?=...)
(?!...)
(?<=...)
(?<!...)
NC
(?mode) imsxU

(?-mode) imsxU

(?mode:...) xsmi
(?-mode:...) xsmi
(?#...)
#... x
\Q
\E \Q

(...) \1
\2
(?P<name>...)
name
\n n

(?:...)
(?>...)
...|...
*
+
?
{n} n
{n,} n
{x,y} x y
*?
+?
??
{n,}? n
{x,y}? x y

*+
++
?+
{n}+ n
{n,}+ n
{x,y}+ x y

(?(condition ... ... condition

(?(condition ... condition

RewriteEngine
On RewriteRule
RewriteCond RewriteRule
RewriteRule pattern substitution [[FLAG1, FLAG2, ...]]
substitution
pattern substitution
$N RewriteRule
%N RewriteCond

%{VARNAME} ${mapname:
key|default}
RewriteCond teststring pattern

RewriteRule RewriteCond
RewriteRule AND
OR teststring
$N RewriteRule
%N RewriteCond
%{VARNAME}
${mapname:key|default}

CO=NAME:VAL:domain
[:lifetime[:path]]
E=VAR:VAL
F 403
G 401
H=Content-handler
L
N

NC
NE

NS

PT
Alias ScriptAlias
Redirect
QSA
R[=Code]
302
S=num num
T=MIME-type

NC
OR OR
AND

HTTP_USER_AGENT REMOTE_ADDR
HTTP_REFERER REMOTE_HOST
HTTP_COOKIE REMOTE_PORT
HTTP_FORWARDED REMOTE_USER
HTTP_HOST REMOTE_IDENT
HTTP_PROXY_CONNECTION REQUEST_METHOD
HTTP_ACCEPT SCRIPT_FILENAME
PATH_INFO
DOCUMENT_ROOT AUTH_TYPE
SERVER_ADMIN
SERVER_ADDR TIME_YEAR
SERVER_PORT TIME_MON
SERVER_PROTOCOL TIME_DAY
SERVER_SOFTWARE TIME_HOUR
TIME_MIN
API_VERSION TIME_WDAY
THE_REQUEST TIME
REQUEST_URI
REQUEST_FILENAME
IS_SUBREQ
HTTPS

AliasMatch pattern file-path|directory-path

$1...$n
<DirectoryMatch pattern> ... </DirectoryMatch>

pattern
<FilesMatch pattern> ... </FilesMatch>
pattern
<LocationMatch pattern> ... </LocationMatch>
pattern
<ProxyMatch pattern> ... </ProxyMatch>
pattern

# Rewrite /foo to /bar


RewriteEngine On
RewriteRule ^/foo$ /bar
# Rewrite pretty url as script parameters
RewriteRule ^/(\w+)/(\d+) /index.php?action=$1&id=$2

# Limit admin url to internal IP addresses


RewriteCond %{REMOTE_ADDR} !192.168.\d*.\d*
RewriteCond %{PATH_INFO} ^admin
RewriteRule .* - [F]

# Make sure admin urls are served over SSL


RewriteCond %{SERVER_PORT} !^443$
RewriteRule ^/admin/(.*)$ https://www.example.com/admin/$1
[L,R]

\b \x08
\e \x1B
\n \x0A
\r \x0D
\t \x09

[...]
[^...]

[ class:]
. /s

\w [a-zA-z0-9_]
\W [^a-zA-z0-9_]
\a [a-zA-z]
\A [^a-zA-z]
\h [a-zA-z_]
\H [^a-zA-z_]
\d [0-9]
\D [^0-9]
\s [ \t]
\S [^ \t]
\x [a-fA-F0-9]
\X [^a-fA-F0-9]
\o [0-7]
\O [^0-7]
\l [a-z]
\L [^a-z]
\u [A-Z]
\U [^A-Z]
\i isident
\I
\k iskeyword

\K
\f isfname

\F
\p isprint x20-x7E
\P

\<

\>

:set ic

:set noic
\u
\l
\U

\L

\E \e \U \L

\(...\) \1 \2
\n n

&

\+
\=
\{n} n
\{n,} n
\{ n} n
\{x y} x y

/pattern ?pattern

pattern ?pattern
n N
:[addr1[,addr2]]s/pattern/replacement/[cgp]
pattern replacement

addr1

addr2

C
g
p

.
$
%
't t
/...[/]
?...[?]
\/
\?
\&
Find spider-man, Spider-Man, Spider Man
/[Ss]pider[- ][Mm]an

Find spider-man, Spider-Man, Spider Man, spiderman, SPIDER-


MAN, etc.
:set ic
/spider[- ]\=man

Globally convert <br> to <br /> for XHTML compliance.


:set ic
: % s/<br>/<br \/>/g

Globally convert <br> to <br /> for XHTML compliance.


: % s/<br>/<br \/>/ig

Urlify: Turn URLs into HTML links


: % s/\(https\=:\/\/[a-z_.\\w\/\\#~:?+=&;%@!-]*\)/< a href="
\1">\1<\/a>/ic
\a awk, sed
\b awk

\f awk, sed
\n awk, sed
\r awk, sed
\t awk, sed
\v awk, sed
\ooctal sed
\octal awk

\xhex awk, sed

\ddecimal awk, sed

\cchar awk, sed


\cC
\b awk
\metacharacter awk, sed, egrep

[...] awk, sed, egrep

[^...] awk, sed, egrep

. awk, sed, egrep

\w egrep, sed
[a-zA-
Z0-9_]
\W egrep, sed

[^a-zA-Z0-
9_]
[ prop:] awk, sed

[^[ prop:]] awk, sed

^ awk, sed, egrep

$ awk, sed, egrep

\< egrep

\> egrep

i I sed

-i egrep

IGNORECASE awk
non-zero
(PATTERN) awk
\(PATTERN\) sed
\1,
\2 \9
\n n sed

...|... egrep, awk, sed

* awk, sed, egrep


+ awk, sed, egrep
? awk, sed, egrep
\{n\} n sed, egrep
\{n,\} n sed, egrep
\{x,y\} x sed, egrep
y

egrep [options] pattern files


files pattern

$ echo 'Spiderman Menaces City!' > dailybugle.txt


$ egrep -i 'spider[- ]?man' dailybugle.txt
Spiderman Menaces City!

sed '[address1][,address2]s/pattern/replacement/[flags]' files


sed -f script files
files

/.../
address1

address2
& \n replacement

& pattern
\n

n
n n
g
pattern
p

w file
file

MM/DD/YYYY DD.MM.YYYY
$ echo 12/30/1969' |
sed 's!\([0-9][0-9]\)/\([0-9][0-9]\)/\([0-9]\{2,4\}\)!
\2.\1.\3!g'

awk 'instructions' files


awk -f script files
instructions script
/pattern/ {action} action
pattern

match(text pattern)
pattern text text

RSTART
RLENGTH
gsub(pattern replacement text)
pattern text replacement
$0 text

sub(pattern replacement text)


pattern text replacement

$0 text

$ cat sub.awk
{
gsub(/https?:\/\/[a-z_.\\w\/\\#~:?+=&;%@!-]*/,
"<a href=\"\&\">\&</a>");

print
}

$ echo "Check the web site, http://www.oreilly.com/


catalog/repr" | awk -f sub.awk

You might also like