Tutorial
I'll follow the example from rex
for url validation.
julia> using RegularExpressions
julia> valid = one_of(not, raw.((".", "/", " ", "-"))...);
julia> valids = of(:some, valid);
julia> dashes = of(:none_or_some, raw("-"));
julia> not_space = one_of(not, short(:space));
julia> not_spaces = of(:none_or_some, not_space);
julia> url_pattern = pattern(
CONSTANTS.start,
capture(
or(
kind(:group, "http", of(:maybe, "s")),
"ftp"
),
name = "protocol"
),
raw("://"),
of(:maybe,
capture(
of(:some, not_space),
name = "username"
),
of(:maybe,
raw(":"),
capture(
not_spaces,
name = "password"
)
),
raw("@")
),
capture(
of(:none_or_some,
valids,
dashes
),
valids,
name = "host"
),
capture(
of(:none_or_some,
raw("."),
of(:none_or_some,
valids,
dashes
),
valids
),
name = "domain"
),
raw("."), capture(
between(2, Inf, valid),
name = "TLD"
),
of(:maybe, raw(":"), capture(
between(2, 5, short(:digit)),
name = "port"
)),
of(:maybe, raw("/"), capture(
not_spaces,
name = "resource"
)),
CONSTANTS.stop
);
julia> goods = (
"http://foo.com/blah_blah",
"http://foo.com/blah_blah/",
"http://foo.com/blah_blah_(wikipedia)",
"http://foo.com/blah_blah_(wikipedia)_(again)",
"http://www.example.com/wpstyle/?p=364",
"https://www.example.com/foo/?bar=baz&inga=42&quux",
"http://✪df.ws/123",
"http://userid:password@example.com:8080",
"http://userid:password@example.com:8080/",
"http://userid@example.com",
"http://userid@example.com/",
"http://userid@example.com:8080",
"http://userid@example.com:8080/",
"http://userid:password@example.com",
"http://userid:password@example.com/",
"http://➡.ws/䨹",
"http://⌘.ws",
"http://⌘.ws/",
"http://foo.com/blah_(wikipedia)#cite-1",
"http://foo.com/blah_(wikipedia)_blah#cite-1",
"http://foo.com/unicode_(✪)_in_parens",
"http://foo.com/(something)?after=parens",
"http://☺.damowmow.com/",
"http://code.google.com/events/#&product=browser",
"http://j.mp",
"ftp://foo.bar/baz",
"http://foo.bar/?q=Test%20URL-encoded%20stuff",
"http://مثال.إختبار",
"http://例子.测试",
"http://-.~_!&'()*+,;=:%40:80%2f::::::@example.com",
"http://1337.net",
"http://a.b-c.de",
"http://223.255.255.254"
);
julia> bads = (
"http://",
"http://.",
"http://..",
"http://../",
"http://?",
"http://??",
"http://??/",
"http://#",
"http://##",
"http://##/",
"http://foo.bar?q=Spaces should be encoded",
"//",
"//a",
"///a",
"///",
"http:///a",
"foo.com",
"rdar://1234",
"h://test",
"http:// shouldfail.com",
":// should fail",
"http://foo.bar/foo(bar)baz quux",
"ftps://foo.bar/",
"http://-error-.invalid/",
"http://-a.b.co",
"http://a.b-.co",
"http://0.0.0.0",
"http://3628126748",
"http://.www.foo.bar/",
"http://www.foo.bar./",
"http://.www.foo.bar./"
);
julia> all(occursin(url_pattern, url) for url in goods)
true
julia> any(occursin(url_pattern, url) for url in bads)
false
Interface
RegularExpressions.CLASSES
RegularExpressions.CONSTANTS
RegularExpressions.EXTRAS
RegularExpressions.GREEDS
RegularExpressions.KINDS
RegularExpressions.OPTIONS
RegularExpressions.PROPERTIES
RegularExpressions.QUANTITIES
RegularExpressions.SHORTS
RegularExpressions.not
RegularExpressions.between
RegularExpressions.capture
RegularExpressions.captured
RegularExpressions.class
RegularExpressions.exists
RegularExpressions.extra
RegularExpressions.kind
RegularExpressions.of
RegularExpressions.one_of
RegularExpressions.option
RegularExpressions.or
RegularExpressions.pattern
RegularExpressions.property
RegularExpressions.raw
RegularExpressions.recurred
RegularExpressions.relative
RegularExpressions.script
RegularExpressions.short
RegularExpressions.template
RegularExpressions.through
RegularExpressions.version
RegularExpressions.whether
RegularExpressions.CLASSES
— ConstantCLASSES
Access with class
.
julia> using RegularExpressions
julia> show(CLASSES)
(letter_or_digit = "alnum", letter = "alpha", standard = "ascii", blank = "blank", control = "cntrl", digit = "digit", prints = "graph", lowercase = "lower", prints_or_space = "print", punctuation = "punct", space = "space", uppercase = "upper", word = "word", hexadecimal = "xdigit")
RegularExpressions.CONSTANTS
— ConstantCONSTANTS
Plain commands.
julia> using RegularExpressions
julia> show(CONSTANTS)
(any = ".", start = "^", stop = "\$", define = "DEFINE", recur = "(?R)", recurred = "R")
julia> p = pattern(CONSTANTS.any)
r"."
julia> occursin(p, "a")
true
RegularExpressions.EXTRAS
— ConstantEXTRAS
Access with extra
.
julia> using RegularExpressions
julia> show(EXTRAS)
(limit_depth = "LIMIT_DEPTH", limit_heap = "LIMIT_HEAP", limit_match = "LIMIT_MATCH", not_empty = "NOTEMPTY", not_empty_at_start = "NOTEMPTY_ATSTART", no_auto_possess = "NO_AUTO_POSSESS", no_dot_star_anchor = "NO_DOTSTAR_ANCHOR", no_just_in_time = "NO_JIT", no_start_optimization = "NO_START_OPT", UTF = "UTF", unicode_properties = "UCP", carriage_return = "CR", linefeed = "LF", carriage_return_linefeed = "CRLF", standard_newline = "ANYCRLF", unicode_newline = "ANY", null = "NUL", standard_boundary = "BSR_ANYCRLF", unicode_boundary = "BSR_UNICODE", accept = "ACCEPT", fail = "FAIL", mark = "MARK", commit = "COMMIT", prune = "PRUNE", skip = "SKIP", then = "THEN")
RegularExpressions.GREEDS
— ConstantGREEDS
julia> using RegularExpressions
julia> show(GREEDS)
(possessive = "+", lazy = "?", greedy = "")
RegularExpressions.KINDS
— ConstantKINDS
Access via kind
.
julia> using RegularExpressions
julia> show(KINDS)
(group = ":", reset = "|", atomic = ">", comment = "#", after = "=", before = "<=", callout = "C")
RegularExpressions.OPTIONS
— ConstantOPTIONS
Access with option
julia> using RegularExpressions
julia> show(OPTIONS)
(caseless = "i", duplicate_names = "J", multi_line = "m", no_auto_capture = "n", single_line = "s", lazy = "U", ignore_space = "x", ignore_all_space = "xx", unset = "^", recur = "R", callout = "C")
RegularExpressions.PROPERTIES
— ConstantPROPERTIES
Access with property
.
julia> using RegularExpressions
julia> show(PROPERTIES)
(other = ("C", (control = "c", format = "f", unassigned = "n", private = "o", surrogate = "s")), letter = ("L", (lowercase = "l", modifier = "m", other = "o", titlecase = "t", uppercase = "u", cased = "&")), mark = ("M", (spacing = "c", enclosing = "e", non_spacing = "n")), number = ("N", (decimal = "d", letter = "l", other = "o")), punctuation = ("P", (connector = "c", dash = "d", close = "e", final = "f", initial = "i", other = "o", open = "s")), symbol = ("S", (currency = "c", modifier = "k", math = "m", other = "o")), seperator = ("Z", (line = "l", paragraph = "p", space = "s")), special = ("X", (letter_or_digit = "an", space = "sp", exists = "uc", word = "wd")))
RegularExpressions.QUANTITIES
— ConstantQUANTITIES
Access with of
.
julia> using RegularExpressions
julia> show(QUANTITIES)
(maybe = "?", none_or_some = "*", some = "+")
RegularExpressions.SHORTS
— ConstantSHORTS
Access with short
.
RegularExpressions.not
— ConstantRegularExpressions.between
— Methodbetween(low, high, them...; greed = :greedy)
Between low
and high
of it
with a certain greed
. Access GREEDS
.
julia> using RegularExpressions
julia> p = pattern(between(1, 3, "a"))
r"a{1,3}"
julia> occursin(p, "aa")
true
julia> p = pattern(between(2, Inf, "a"))
r"a{2,}"
julia> occursin(p, "aaa")
true
RegularExpressions.capture
— Methodcapture(them...; name = nothing)
Capture them
with optional name
. See examples in captured
.
RegularExpressions.captured
— Methodcaptured(it)
Refer to a capture
d group. See relative
.
julia> using RegularExpressions
julia> p = pattern(capture("a"), captured(1))
r"(a)\g<1>"
julia> occursin(p, "aa")
true
RegularExpressions.class
— Methodclass([::Not], it)
Character classes. Access CLASSES
. You can negate all class
es with not
.
julia> using RegularExpressions
julia> p = pattern(one_of(class(:space)))
r"[[:space:]]"
julia> occursin(p, " ")
true
julia> p = pattern(one_of(class(not, :space)))
r"[[:^space:]]"
julia> occursin(p, "a")
true
RegularExpressions.exists
— Methodexists(it)
Check whether a capture group. For use with whether
.
julia> using RegularExpressions
julia> p = pattern(
CONSTANTS.start,
of(:maybe, capture("a", name = "first")),
whether(exists("first"), "b", "c")
)
r"^(?:(?<first>a))?(?(<first>)b|c)"
julia> occursin(p, "ab")
true
RegularExpressions.extra
— Methodextra(it)
extra(it, name)
extra(it, value::Number)
extra
command. Access EXTRAS
.
julia> using RegularExpressions
julia> p = pattern(extra(:standard_newline), "a")
r"(*ANYCRLF)a"
julia> occursin(p, "a\r")
true
julia> extra(:limit_match, 0)
"(*LIMIT_MATCH=0)"
julia> extra(:mark, "name")
"(*MARK:name)"
RegularExpressions.kind
— Methodkind([::Not], a_kind, them...)
A variety of syntaxes: a_kind
of them
. Access KINDS
. Use repr
to pass strings to callouts. You can negate look-ahead and look-behinds with not
.
julia> using RegularExpressions
julia> p = pattern(kind(:before, "a"), "b")
r"(?<=a)b"
julia> occursin(p, "ab")
true
julia> negated = pattern(kind(not, :before, "a"), "b")
r"(?<!a)b"
julia> occursin(negated, "ab")
false
RegularExpressions.of
— Methodof(quantity::Symbol, them...; greed = :greedy)
of(quantity::Number, them...)
A quantity
of
it
with a certain greed
. Acccess QUANTITIES
and GREEDS
.
julia> using RegularExpressions
julia> p = pattern(of(:some, "a"))
r"a+"
julia> occursin(p, "aa")
true
julia> p = pattern(of(2, "a"))
r"a{2}"
julia> occursin(p, "aa")
true
RegularExpressions.one_of
— Methodone_of([::Not], them...)
Create a character class. You can negate all classes with not
.
julia> using RegularExpressions
julia> p = pattern(one_of('a', 'b'))
r"[ab]"
julia> occursin(p, "b")
true
julia> p = pattern(one_of(not, 'a', 'b'))
r"[^ab]"
julia> occursin(p, "c")
true
RegularExpressions.option
— Methodoption([::Not]; options...)
option
. Access OPTIONS
.
julia> using RegularExpressions
julia> p = pattern(option(caseless = true, ignore_space = true), "a ")
r"(?ix)a "
julia> occursin(p, "A")
true
julia> p = pattern(option(caseless = true), option(not, caseless = true), "a")
r"(?i)(?-i)a"
julia> occursin(p, "A")
false
RegularExpressions.or
— Methodor(them...)
One of them
.
julia> using RegularExpressions
julia> p = pattern(or("a", "b"))
r"a|b"
julia> occursin(p, "b")
true
RegularExpressions.pattern
— Methodpattern(them..., options...)
Splat of Regex
. Options can be in OPTIONS
julia> using RegularExpressions
julia> p = pattern("a", "b")
r"ab"
julia> occursin(p, "ab")
true
julia> p = pattern("A", caseless = true)
r"A"i
julia> occursin(p, "a")
true
RegularExpressions.property
— Methodproperty([::Not], general, [specific])
A character property. Access PROPERTIES
. You can negate all properties with not
.
julia> using RegularExpressions
julia> p = pattern(property(:seperator))
r"\p{Z}"
julia> occursin(p, " ")
true
julia> p = pattern(property(not, :seperator))
r"\P{Z}"
julia> occursin(p, "a")
true
julia> p = pattern(property(:seperator, :space))
r"\p{Zs}"
julia> occursin(p, " ")
true
julia> p = pattern(property(not, :seperator, :space))
r"\P{Zs}"
julia> occursin(p, "a")
true
RegularExpressions.raw
— Methodraw(it)
Escape punctuation.
julia> using RegularExpressions
julia> p = pattern(raw("1.0"))
r"1\.0"
julia> occursin(p, "v1.0")
true
RegularExpressions.recurred
— Methodrecurred(it::Number)
recurred(it)
Check for recursion. Use with whether
.
julia> using RegularExpressions
julia> recurred(1)
"R1"
julia> recurred("name")
"R&name"
RegularExpressions.relative
— Methodrelative(it)
Mark a reference as relative. For use with captured
or whether
.
julia> using RegularExpressions
julia> p = pattern(captured(relative(1)), capture("a"))
r"\g<+1>(a)"
julia> occursin(p, "aa")
true
julia> p = pattern(capture("a"), captured(relative(-1)))
r"(a)\g<-1>"
julia> occursin(p, "aa")
true
RegularExpressions.script
— Methodscript([::Not], it)
A character from a script. You can negate all script
s with not
.
julia> using RegularExpressions
julia> p = pattern(script(:Han))
r"\p{Han}"
julia> occursin(p, "中")
true
julia> p = pattern(script(not, :Han))
r"\P{Han}"
julia> occursin(p, "a")
true
RegularExpressions.short
— Methodshort([::Not], it)
A short
command. Access SHORTS
. You can negate some short
commands with not
.
julia> using RegularExpressions
julia> p = pattern(short(:space))
r"\s"
julia> occursin(p, " ")
true
julia> p = pattern(short(not, :space))
r"\S"
julia> occursin(p, "a")
true
RegularExpressions.template
— Methodtemplate(them...)
Splat of SubstitutionString
.
julia> using RegularExpressions
julia> p = pattern(capture("a"))
r"(a)"
julia> t = template(captured(1), "b")
s"\\g<1>b"
julia> replace("a", p => t)
"ab"
RegularExpressions.through
— Methodthrough(start, stop)
A range of characters
julia> using RegularExpressions
julia> p = pattern(one_of(through('a', 'c')))
r"[a-c]"
julia> occursin(p, "b")
true
RegularExpressions.version
— Methodversion(it; at_least = false)
Check whether the version of PCRE2 is it
, (or, at_least
it
). For use with whether
.
julia> using RegularExpressions
julia> p = pattern(whether(version(1), "new", "old"))
r"(?(VERSION=1)new|old)"
julia> occursin(p, "new")
false
julia> p = pattern(whether(version(1, at_least = true), "new", "old"))
r"(?(VERSION>=1)new|old)"
julia> occursin(p, "new")
true
RegularExpressions.whether
— Functionwhether(condition, yes, no = "")
Test for a condition. See relative
, exists
, recurred
, and version
.
julia> using RegularExpressions
julia> p = pattern(
CONSTANTS.start,
of(:maybe, capture("a")),
whether(1, "b", "c")
)
r"^(?:(a))?(?(1)b|c)"
julia> occursin(p, "ab")
true