aa66d9b83d
Add attempts at solving coding problems to Briefcase.
121 lines
3.1 KiB
Python
121 lines
3.1 KiB
Python
from parser import Parser
|
|
|
|
# As an exercise to stress-test my understanding of recursive descent parsers,
|
|
# I'm attempting to write a JSON parser without referencing any existing BNF
|
|
# descriptions of JSON or existing JSON parser implementations.
|
|
#
|
|
# I'm only parsing a subset of JSON: enough to parse `sample`. Here is the BNF
|
|
# that I wrote to describe my expected input:
|
|
#
|
|
# expression -> object
|
|
# object -> '{' ( STRING ':' expression ) ( ',' STRING ':' expression )* '}'
|
|
# | array
|
|
# array -> '[' expression ( ',' expression )* ']'
|
|
# | literal
|
|
# literal -> STRING | INT
|
|
|
|
def tokenize(xs):
|
|
"""
|
|
Return a list of tokens from the string input, `xs`.
|
|
"""
|
|
result = []
|
|
i = 0
|
|
while i < len(xs):
|
|
# single characters
|
|
if xs[i] in ",{}:[]":
|
|
result.append(xs[i])
|
|
i += 1
|
|
# strings
|
|
elif xs[i] == "\"":
|
|
curr = xs[i]
|
|
i += 1
|
|
while xs[i] != "\"":
|
|
curr += xs[i]
|
|
i += 1
|
|
curr += xs[i]
|
|
result.append(curr)
|
|
i += 1
|
|
# integers
|
|
elif xs[i] in "0123456789":
|
|
curr = xs[i]
|
|
i += 1
|
|
while xs[i] in "0123456789":
|
|
curr += xs[i]
|
|
i += 1
|
|
result.append(int(curr))
|
|
# whitespace
|
|
elif xs[i] in {" ", "\n"}:
|
|
i += 1
|
|
return result
|
|
|
|
def parse_json(x):
|
|
"""
|
|
Attempt to parse the string, `x`, into JSON.
|
|
"""
|
|
tokens = tokenize(x)
|
|
return parse_object(Parser(tokens))
|
|
|
|
def parse_object(parser):
|
|
if parser.match(['{']):
|
|
key = parse_string(parser)
|
|
parser.expect([':'])
|
|
value = parse_object(parser)
|
|
result = [(key, value)]
|
|
while parser.match([',']):
|
|
key = parse_string(parser)
|
|
parser.match([':'])
|
|
value = parse_object(parser)
|
|
result.append((key, value))
|
|
return result
|
|
return parse_array(parser)
|
|
|
|
def parse_array(parser):
|
|
if parser.match(['[']):
|
|
if parser.match([']']):
|
|
return []
|
|
result = [parse_object(parser)]
|
|
while parser.match([',']):
|
|
result.append(parse_object(parser))
|
|
parser.expect([']'])
|
|
return result
|
|
else:
|
|
return parse_literal(parser)
|
|
|
|
def parse_string(parser):
|
|
if parser.curr().startswith("\""):
|
|
return parser.consume()
|
|
else:
|
|
raise Exception("Unexpected token: {}".format(parser.curr()))
|
|
|
|
def parse_literal(parser):
|
|
return parser.consume()
|
|
|
|
sample = """
|
|
{
|
|
"glossary": {
|
|
"title": "example glossary",
|
|
"GlossDiv": {
|
|
"title": "S",
|
|
"GlossList": {
|
|
"GlossEntry": {
|
|
"ID": "SGML",
|
|
"SortAs": "SGML",
|
|
"GlossTerm": "Standard Generalized Markup Language",
|
|
"Acronym": "SGML",
|
|
"Abbrev": "ISO 8879:1986",
|
|
"GlossDef": {
|
|
"para": "A meta-markup language, used to create markup languages such as DocBook.",
|
|
"GlossSeeAlso": [
|
|
"GML",
|
|
"XML"
|
|
]
|
|
},
|
|
"GlossSee": "markup"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
"""
|
|
|
|
print(parse_json(sample))
|