Python pyparsing 模块,Regex() 实例源码
我们从Python开源项目中,提取了以下10个代码示例,用于说明如何使用pyparsing.Regex()。
def lexical_analysis(self, src):
delimited = re.sub(r'\s+', ' ', ' '.join(src.strip().split('\n'))).split(';')
result = []
for stmt in delimited:
if stmt == '':
return result
string = pp.Regex('[a-zA-Z0-9=_]+')
nums = pp.Regex('[0-9]+')
ws = pp.OneOrMore(pp.White()).suppress()
lp = pp.Regex('[(]').suppress()
rp = pp.Regex('[)]').suppress()
c = pp.Regex('[,]').suppress()
q = pp.Regex("[']").suppress()
table_name = string.setResultsName('table_name')
create_table = (pp.Keyword('CREATE', caseless = True) + ws + pp.Keyword('TABLE', caseless = True) + ws + pp.Optional(pp.Keyword('IF', caseless = True) + ws + pp.Keyword('NOT', caseless = True) + ws + pp.Keyword('EXISTS', caseless = True))).suppress() + table_name + lp
column_name = string.setResultsName('column_name')
data_type = string.setResultsName('data_type')
length = lp + nums.setResultsName('length') + rp
nullable = (pp.Optional(pp.Keyword('NOT', caseless = True) + ws) + pp.Keyword('NULL', caseless = True)).setResultsName('nullable')
default_value = pp.Keyword('DEFAULT', caseless = True).suppress() + ws + string.setResultsName('default_value')
auto_increment = pp.Keyword('AUTO_INCREMENT', caseless = True).setResultsName('auto_increment')
column = pp.Optional(ws) + column_name + ws + data_type + pp.Optional(pp.MatchFirst([length, ws + nullable, ws + default_value, ws + auto_increment])) + pp.Optional(pp.MatchFirst([ws + nullable, ws + auto_increment])) + pp.Optional(pp.MatchFirst([ws + default_value, ws + auto_increment])) + pp.Optional(ws + auto_increment) + pp.Optional(ws) + c
primary_key = pp.Keyword('PRIMARY KEY', caseless = True).suppress() + lp + pp.OneOrMore(q + string.setResultsName('primary_key') + q + pp.Optional(c)) + rp + pp.Optional(c)
key = pp.Keyword('KEY', caseless = True).suppress() + lp + q + string.setResultsName('key') + q + pp.Optional(c) + rp + pp.Optional(c)
parser = create_table + pp.OneOrMore(pp.Group(column)) + pp.Optional(primary_key) + pp.Optional(key) + rp + pp.OneOrMore(ws + string).suppress()
result.append(parser.parseString(stmt, parseAll=True))
return result
def _build_parser():
date_literal = pp.Regex(r'(?P<year>\d{4})/(?P<month>\d{2})/(?P<day>\d{2})') \
.setParseAction(lambda s,l,t: schema.Date(t.year, t.month, t.day))
dollars_literal = pp.Regex(r'\$\d+(\.\d{2})') \
.setParseAction(lambda s,t: schema.Dollars(t[0]))
string_literal = (pp.QuotedString('"', escChar='\\') | pp.QuotedString("'", escChar='\\')) \
.setParseAction(lambda s,t: schema.String(t[0]))
literal = date_literal | dollars_literal | string_literal
ident = pp.Word(pp.alphas)
match_op = pp.oneOf(operator_map.keys())
match = ident + match_op + literal
assign_op = pp.Literal('=')
assign = ident + assign_op + literal
part = (match | assign).setParseAction(lambda s,t: [t])
rule = pp.delimitedList(part) + pp.LineEnd()
return rule
def _parse_atat_lattice(lattice_in):
"""Parse an ATAT-style `lat.in` string.
The parsed string will be in three groups: (Coordinate system) (lattice) (atoms)
where the atom group is split up into subgroups,each describing the position and atom name
"""
float_number = Regex(r'[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?').setParseAction(lambda t: [float(t[0])])
vector = Group(float_number + float_number + float_number)
angles = vector
vector_line = vector + Suppress(LineEnd())
coord_sys = Group((vector_line + vector_line + vector_line) | (vector + angles + Suppress(LineEnd())))
lattice = Group(vector + vector + vector)
atom = Group(vector + Group(OneOrMore(Word(alphas + '_'))))
atat_lattice_grammer = coord_sys + lattice + Group(OneOrMore(atom))
# parse the input string and convert it to a POSCAR string
return atat_lattice_grammer.parseString(lattice_in)
def _create_simple_statements():
global binary, ident, rvalue, simple_statement, semi, comp, number, slot_id, callrpc_stmt, generic_statement, streamer_stmt, stream, selector
if simple_statement is not None:
return
Meta_stmt = Group(Literal('Meta').suppress() + ident + Literal('=').suppress() + rvalue + semi).setResultsName('Meta_statement')
require_stmt = Group(Literal('require').suppress() + ident + comp + rvalue + semi).setResultsName('require_statement')
set_stmt = Group(Literal('set').suppress() - (ident | number) - Literal("to").suppress() - (rvalue | binary) - Optional(Literal('as').suppress() + config_type) + semi).setResultsName('set_statement')
callrpc_stmt = Group(Literal("call").suppress() + (ident | number) + Literal("on").suppress() + slot_id + Optional(Literal("=>").suppress() + stream('explicit_stream')) + semi).setResultsName('call_statement')
streamer_stmt = Group(Optional(Literal("manual")('manual')) + Optional(oneOf(u'encrypted signed')('security')) + Optional(Literal(u'realtime')('realtime')) + Literal('streamer').suppress() -
Literal('on').suppress() - selector('selector') - Optional(Literal('to').suppress() - slot_id('explicit_tile')) - Optional(Literal('with').suppress() - Literal('streamer').suppress() - number('with_other')) - semi).setResultsName('streamer_statement')
copy_stmt = Group(Literal("copy").suppress() - Optional(oneOf("all count average")('modifier')) - Optional(stream('explicit_input') | number('constant_input')) - Literal("=>") - stream("output") - semi).setResultsName('copy_statement')
trigger_stmt = Group(Literal("trigger") - Literal("streamer") - number('index') - semi).setResultsName('trigger_statement')
simple_statement = Meta_stmt | require_stmt | set_stmt | callrpc_stmt | streamer_stmt | trigger_stmt | copy_stmt
# In generic statements,keep track of the location where the match started for error handling
locator = Empty().setParseAction(lambda s, l, t: l)('location')
generic_statement = Group(locator + Group(ZeroOrMore(Regex(u"[^{};]+")) + Literal(u';'))('match')).setResultsName('unparsed_statement')
def lexical_analysis(self, src):
string = pp.Regex('[a-zA-Z0-9_{}"=+\-*/\.:;&%@$#<>? ?-??-??-???-???????-?]+')
blank = pp.Linestart() + pp.LineEnd()
start = '['
end = ']' + pp.LineEnd()
graph_tag = pp.Linestart() + '@'
graph = graph_tag + start + string + end
view_tag = pp.Linestart() + '#'
view = view_tag + start + string + end
server_process_tag = pp.Linestart() + '$'
server_process = server_process_tag + start + string + end
client_process_tag = pp.Linestart() + '%'
client_process = client_process_tag + start + string + end
view_transition_identifier = pp.Linestart() + '-->'
view_transition = view_transition_identifier + string
process_transition_identifier = pp.Linestart() + '==>'
process_transition = process_transition_identifier + string
state_machine = pp.OneOrMore(graph | view | server_process | client_process | view_transition | process_transition | string | blank)
return state_machine.parseString(src)
def make_grammar():
"""Creates the grammar to be used by a spec matcher."""
# This is apparently how pyparsing recommends to be used,
# as http://pyparsing.wikispaces.com/share/view/644825 states that
# it is not thread-safe to use a parser across threads.
unary_ops = (
# Order matters here (so that '=' doesn't match before '==')
Literal("==") | Literal("=") |
Literal("!=") | Literal("<in>") |
Literal(">=") | Literal("<=") |
Literal(">") | Literal("<") |
Literal("s==") | Literal("s!=") |
# Order matters here (so that '<' doesn't match before '<=')
Literal("s<=") | Literal("s<") |
# Order matters here (so that '>' doesn't match before '>=')
Literal("s>=") | Literal("s>"))
or_ = Literal("<or>")
# An atom is anything not an keyword followed by anything but whitespace
atom = ~(unary_ops | or_) + Regex(r"\S+")
unary = unary_ops + atom
disjunction = OneOrMore(or_ + atom)
# Even-numbered tokens will be '<or>',so we drop them
disjunction.setParseAction(lambda _s, _l, t: ["<or>"] + t[1::2])
expr = disjunction | unary | atom
return expr
def __init__(self):
"""
Create a parser that parse arithmetic expressions. They can
contains variable identifiers or raw numbers. The meaning
for the identifiers is left to the
"""
number = p.Regex(r'\d+(\.\d*)?([eE]\d+)?')
identifier = p.Word(p.alphas)
terminal = identifier | number
self._expr = p.infixNotation(terminal, [
(p.oneOf('* /'), 2, p.opAssoc.LEFT),
(p.oneOf('+ -'), p.opAssoc.LEFT)
]) + p.stringEnd()
def _create_primitives():
global binary, quoted_string, time_interval, config_type, comment, stream_trigger, selector
if ident is not None:
return
semi = Literal(u';').suppress()
ident = Word(alphas+u"_", alphas + nums + u"_")
number = Regex(u'((0x[a-fA-F0-9]+)|[+-]?[0-9]+)').setParseAction(lambda s, t: [int(t[0], 0)])
binary = Regex(u'hex:([a-fA-F0-9][a-fA-F0-9])+').setParseAction(lambda s, t: [unhexlify(t[0][4:])])
quoted_string = dblQuotedString
comment = Literal('#') + restOfLine
rvalue = number | quoted_string
# Convert all time intervals into an integer number of seconds
time_unit_multipliers = {
u'second': 1,
u'seconds': 1,
u'minute': 60,
u'minutes': 60,
u'hour': 60*60,
u'hours': 60*60,
u'day': 60*60*24,
u'days': 60*60*24,
u'month': 60*60*24*30,
u'months': 60*60*24*30,
u'year': 60*60*24*365,
u'years': 60*60*24*365,
}
config_type = oneOf('uint8_t uint16_t uint32_t int8_t int16_t int32_t uint8_t[] uint16_t[] uint32_t[] int8_t[] int16_t[] int32_t[] string binary')
comp = oneOf('> < >= <= == ~=')
time_unit = oneOf(u"second seconds minute minutes hour hours day days week weeks month months year years")
time_interval = (number + time_unit).setParseAction(lambda s, t: [t[0]*time_unit_multipliers[t[1]]])
slot_id = Literal(u"controller") | (Literal(u'slot') + number)
slot_id.setParseAction(lambda s,t: [SlotIdentifier.FromString(u' '.join([str(x) for x in t]))])
stream_modifier = Literal("system") | Literal("user") | Literal("combined")
stream = Optional(Literal("system")) + oneOf("buffered unbuffered input output counter constant") + number + Optional(Literal("node"))
stream.setParseAction(lambda s,t: [DataStream.FromString(u' '.join([str(x) for x in t]))])
all_selector = Optional(Literal("all")) + Optional(stream_modifier) + oneOf("buffered unbuffered inputs outputs counters constants") + Optional(Literal("nodes"))
all_selector.setParseAction(lambda s,t: [DataStreamSelector.FromString(u' '.join([str(x) for x in t]))])
one_selector = Optional(Literal("system")) + oneOf("buffered unbuffered input output counter constant") + number + Optional(Literal("node"))
one_selector.setParseAction(lambda s,t: [DataStreamSelector.FromString(u' '.join([str(x) for x in t]))])
selector = one_selector | all_selector
trigger_comp = oneOf('> < >= <= ==')
stream_trigger = Group((Literal(u'count') | Literal(u'value')) + Literal(u'(').suppress() - stream - Literal(u')').suppress() - trigger_comp - number).setResultsName('stream_trigger')