#
# json.icn - JSON support library
#
# Authors: Gigi Young, Clinton Jeffery
#
# This library is intended to support conversion of Unicon values
# to and from JSON format, suitable for storage or transmission.
#
#
# Error handling object
#
class ErrorHandler(filename,
lineno,
error,
errout,
tag,
strict # Flag to enforce strict JSON if not null
)
method init(filename)
self.filename := filename
self.lineno := 1
self.error := &null
return
end
method incr_line()
lineno +:= 1
return
end
method constr_msg(s)
if \filename then
return tag || " " || filename || ":" || lineno || ": " || s
else
return tag || " " || s
end
#
# Sets (error) using constr_msg()
#
method set_err(s)
if /(error) then {
error := constr_msg(s)
}
end
#
# Writes error (s) to file descriptor specified by errout.
#
method write_err(s)
write(errout,tag||" "||s)
end
#
# Write out (error), if it exists, to (errout)
#
method get_err()
if \error then {
write(errout, error)
}
end
initially(errout,strict)
self.errout := \errout | &errout
self.strict := strict | &null
self.lineno := 1
self.tag := "[json]"
end
#
# For standalone testing
#
$ifdef MAIN
procedure main(argv)
local input
if *argv=0 then {
write("Enter the name of a JSON file:")
x := jtou(input := read())
write(utoj(x))
}
else every i := 1 to *argv do {
write(argv[i])
x := jtou(argv[i])
write(utoj(x))
}
end
$endif
#
# For scanner speed testing
#
$ifdef SCAN
procedure main(argv)
local input
if *argv=0 then {
write("Enter the name of a JSON file:")
file := open(input := read())
text := ""
while text ||:= read(file) || "\n"
scanner := create json_scanner(text)
while write(@scanner)
}
else every i := 1 to *argv do {
file := open(argv[i])
text := ""
while text ||:= read(file) || "\n"
scanner := create json_scanner(text)
while write(@scanner)
}
end
$endif
#
# Takes a string s that will be interpreted as JSON data.
# If the conversion fails, jtou() attempts to use s as a
# filename. If it fails, then the function fails.
#
# This procedure is a generator. It will return the
# corresponding unicon objects, one-by-one, until the
# conversion is complete.
#
# <[param s - JSON-encoded string or JSON filename]>
# <[param strict - Flag for specifing strict JSON syntax (non-null)]>
# <[param mode - Specifies explicit string, "s", or file, "f", handling]>
# <[param error - file for error messages (defaults to <tt>&errout</tt>)]>
#
procedure jtou(s,strict,mode,error)
local file, j, u, tok_gen, jerror, errout
j := ""
jerror := ErrorHandler(error,strict)
# default semantics
if /mode then {
# run as string
tok_gen := create json_scanner(s,jerror)
if u := json_parser(tok_gen,jerror) then {
suspend u
while u := json_parser(tok_gen,jerror) do suspend u
fail
}
# string conversion failed, run as file
else {
if not (file := open(s)) then {
return jerror.write_err("Unable to open file "||image(s))
}
while j ||:= read(file) || "\n"
close(file)
jerror.init(s)
}
}
# mode specified
else {
case mode of {
# file mode
"f" : {
if not (file := open(s)) then {
return jerror.write_err("Unable to open file "||image(s))
}
while j ||:= read(file) || "\n"
close(file)
jerror.init(s)
}
# string mode
"s" : j := s
default: return jerror.write_err("Invalid mode "||image(mode))
}
}
# parse
tok_gen := create json_scanner(j,jerror)
while u := json_parser(tok_gen,jerror) do {
suspend u
}
jerror.get_err()
end
#<p>
# Simple interface to convert a JSON string into a unicon entity.
# <i>Note: Unlike jtou(), this assumes the JSON string contains a single
# unicon entity.</i>
# <[param s - JSON-encoded string]>
# <[param strict - Flag for specifing strict JSON syntax (non-null)]>
# <[param error - file for error messages (defaults to <tt>&errout</tt>)]>
# <[return unicon entity encoded in JSON string]>
#</p>
procedure jtous(s,strict,error)
local tok_gen, u, jerror
#jerror := ErrorHandler(error,strict) # defaults error to &errout
jerror := ErrorHandler(error) # defaults error to &errout
tok_gen := create json_scanner(s, jerror)
while u := json_parser(tok_gen, jerror) do suspend u
jerror.get_err()
end
#<p>
# Simple interface to convert a JSON file into a unicon entity.
# <[param s - JSON filename]>
# <[param strict - Flag for specifing strict JSON syntax (non-null)]>
# <[param error - file for error messages (defaults to <tt>&errout</tt>)]>
# <[return unicon entity encoded in JSON string]>
#</p>
procedure jtouf(s,strict,error)
local tok_gen, u, jerror
jerror := ErrorHandler(error,strict) # defaults error to &errout
if not (file := open(s)) then {
return jerror.write_err("Unable to open file "||image(s))
}
while j ||:= read(file) || "\n"
close(file)
jerror.init(s)
tok_gen := create json_scanner(j, jerror)
while u := json_parser(tok_gen, jerror) do suspend u
jerror.get_err()
end
#
# It is insane to propose a single function as the primary API for the
# entire library, so here it is.
#
# Given a Unicon structure, produce a JSON equivalent if possible.
#
# An optional second parameter, error, as a file handle will specify
# where to print an error message in the event of a error
#
procedure utoj(x,strict,error)
local j;
jerror := ErrorHandler(error,strict)
if j := _utoj(x,jerror) then
return j
jerror.get_err()
end
#
# Given a Unicon structure, produce a JSON equivalent if possible.
#
# TODO: Check for fails that need error messages
#
procedure _utoj(u,jerror)
local s, tmp
case type(u) of {
"null": return type(u)
"string": {
return if u == "__true__" then "true"
else if u == "__false__" then "false"
else jsonify_string(u,jerror)
}
"integer" | "real": return image(u)
"list": {
s := "["
if *u > 0 then s ||:= _utoj(u[1],jerror)
every i := 2 to *u do {
s ||:= ("," || _utoj(u[i],jerror)) | fail
}
s ||:= "]"
return s
}
"set": { # {"__uniset__":[...]}
s := "{\"__uniset__\":["; i := 1
every x := !u do {
if i>1 then s ||:= ","
s ||:= _utoj(x,jerror) | fail
i +:= 1
}
s ||:= "]}"
return s
}
"cset": { # {"__unicset__":"..."}
return "{\"__unicset__\":"||jsonify_string(string(u),jerror)||"}"
}
"table": { # {"__unitable__":1, ...}
s := "{\"__unitable__\":1"
every k := key(u) do {
if s[-1] ~== "{" then s ||:= ","
if type(k) == "string" then
s ||:= jsonify_string(k,jerror)||":"|| _utoj(u[k],jerror) | fail
else {
if \(jerror.strict) then
s ||:= "\""||image(k)||"\":"||_utoj(u[k],jerror) | fail
else
s ||:= _utoj(k,jerror)||":"||_utoj(u[k],jerror) | fail
}
}
s ||:= "}"
return s
}
default: {
# Class - {"__uniclass__":"ClassName", ...}
if match("object ", tmp := image(u)) then {
s := "{\"__uniclass__\":\""||tmp[8:upto("_",tmp)\1|0]||"\""
every k := key(u) do {
if k == ("__s" | "__m") then next
if type(k) ~== "string" then fail
s ||:=","||jsonify_string(k,jerror)||":"||_utoj(u[k],jerror)|fail
}
s ||:= "}"
return s
}
# Record - {"__unirecord__":"RecordName", ...}
else if match("record ", tmp := image(u)) then {
s := "{\"__unirecord__\":\""||tmp[8:upto("_",tmp)\1|0]||"\""
every k := key(u) do {
if type(k) ~== "string" then fail
s ||:=","||jsonify_string(k,jerror)||":"||_utoj(u[k],jerror)|fail
}
s ||:= "}"
return s
}
else {
return jerror.set_err(type(u) || " has no JSON equivalent")
}
}
}
end
#
# Procedure for converting Unicon escapes and escape sequences to valid
# JSON escape sequences.
#
procedure jsonify_string(s,jerror)
static hex, oct, T_Esc
local ns := "", tmp, i
initial {
hex := '0123456789abcdefABCDEF'
oct := '01234567'
T_Esc := table()
every i := 0 to 31 do
T_Esc[char(i)] := "\\u"||right(_dectohex(i),4,"0")
T_Esc[char(8)] := "\\b"
T_Esc[char(9)] := "\\t"
T_Esc[char(10)] := "\\n"
T_Esc[char(12)] := "\\f"
T_Esc[char(13)] := "\\r"
T_Esc[char(127)] := "\\u007F" # \d
}
s ? {
while c := move(1) do {
if c == '\"' then return jerror.set_err("Unicon string \""||s||
"\" contains an illegal double quote for JSON strings")
# Handle escape sequences
if c == "\\" then {
case (c := move(1)) |
return jerror.set_err("Incomplete escape sequence") of {
# JSON escapes
"\\": ns ||:= "\\\\"
"/": ns ||:= "\\/"
"\"": ns ||:= "\\\""
"u": { # restrict to 16-bit unicode
tmp := tab(many(hex)) | ""
if (i := *tmp) > 4 then {
tmp := tmp[1:5]
move(*tmp - i)
}
ns ||:= "\\u"||right(tmp,4,"0")
}
# Unicon escapes
"b": ns ||:= "\\b"
"d": ns ||:= "\\u007F"
"e": ns ||:= "\\u001B"
"f": ns ||:= "\\f"
"l"|"n": ns ||:= "\\n"
"r": ns ||:= "\\r"
"t": ns ||:= "\\t"
"v": ns ||:= "\\u000B"
"x": { # hexadecimal, \xXX
# Taken from hexcode() in escape.icn
tmp := tab(many(hex)) | ""
if (i := *tmp) > 2 then {
tmp := tmp[1:3]
move(*tmp - i)
}
ns ||:= "\\u00"||tmp
}
!oct: { # octal
# Taken from octcode() in escape.icn
tmp := c || tab(many(oct)) | ""
if (i := *tmp) > 3 then { # back off if too large
tmp := tmp[1:4]
move(*tmp - i)
}
ns ||:= "\\u"||right(_octtohex(tmp),4,"0")
}
"^": { # control sequence
c := move(1)
if c == "?" then ns ||:= T_Esc["\d"]
else ns ||:= \(T_Esc[tmp := char(ord(c)%32)]) | tmp
}
default: { # Use unicon conventions for escape sequences
ns ||:= c
}
} # end case
} # end if
# Handle escaped characters
else ns ||:= \(T_Esc[c]) | c
} # end while
}
return "\""||ns||"\""
end
#
# Given an octal string, returns a hexadecimal string. Uses Unicon octal
# specifications
#
procedure _octtohex(s)
local val := 0, len := *s, i := len
while i > 0 do {
val +:= integer(s[i]) * 8^(len-i)
i -:= 1
}
return _dectohex(val % 256)
end
#
# Given an integer value, return a hexidecimal string
#
procedure _dectohex(val)
local ns := "", rem
while val > 0 do {
rem := val % 16
val := integer(val/16)
if rem < 10 then ns := string(rem) || ns
else case rem of {
10: ns := "A" || ns
11: ns := "B" || ns
12: ns := "C" || ns
13: ns := "D" || ns
14: ns := "E" || ns
15: ns := "F" || ns
}
}
return ns
end
#####################
# SCANNER FUNCTIONS #
#####################
#
# A string-scanning generator - takes a JSON-formatted string
# and returns single JSON tokens until scanning is complete
#
procedure json_scanner(s,jerror)
local token
local ws, operator, number
local end_pos := *s + 1
ws := ' \t'
operator := '{}[]:,'
number := '-0123456789'
s ? {
repeat {
# skip whitespace
tab(many(ws))
c := move(1) | fail
# operators
if any(operator, c) then token := c
# numbers
else if any(number, c) then token := scan_number(c,jerror) | fail
# strings
else if c == "\"" then token := scan_string(jerror) | fail
# true
else if c == "t" then token := scan_true(jerror) | fail
# false
else if c == "f" then token := scan_false(jerror) | fail
# null
else if c == "n" then token := scan_null(jerror) | fail
# line counter
else if c == "\n" then { jerror.incr_line(); next }
else {
return jerror.set_err("Unrecognized JSON token: " ||
c || tab(upto(ws++operator)\1))
}
suspend token
}
}
end
#
# String scanning helper function to retrieve JSON value 'true'
#
procedure scan_true(jerror)
if move(3) == "rue" then return "true"
else
return jerror.set_err("Expected JSON true: " ||
"t"||tab(upto(' \t\n{}[]:,')\1))
end
#
# String scanning helper function to retrieve JSON value 'false'
#
procedure scan_false(jerror)
if move(4) == "alse" then return "false"
else
return jerror.set_err("Expected JSON false: " ||
"f"||tab(upto(' \t\n{}[]:,')\1))
end
#
# String scanning helper function to retrieve JSON value 'null'
#
procedure scan_null(jerror)
if move(3) == "ull" then return "null"
else
return jerror.set_err("Expected JSON null: " ||
"n"||tab(upto(' \t\n{}[]:,')\1))
end
#
# String scanning helper function that finds a valid JSON escape sequence
# and returns a valid Unicon escape or escape sequence if possible.
#
procedure scan_ctrl_char(jerror)
static hex := '0123456789abcdefABCDEF'
local i, ns
#
# This code is modified from escape() from IPL file escape.icn
#
case (c := move(1)) |
return jerror.set_err("Incomplete JSON escape sequence") of {
# JSON escapes
"b": return "\b"
"f": return "\f"
"n": return "\n"
"r": return "\r"
"t": return "\t"
"\\": return "\\"
"/": return "/"
"\"": return "\""
"u": {
#
# Code taken from hexcode() in escape.icn and modified to handle
# unicode escape sequence conversion. JSON supports unicode from
# 0020-10FFFF, but this code restricts unicode to 16-bits to remove
# ambiguity from Unicon and JSON escape conversion.
#
# If the unicode is extended ASCII, then return a Unicon hex escape.
# Otherwise try to return a unicode escape
#
ns := tab(many(hex)) | ""
if (i := *ns) > 4 then {
ns := ns[1:5]
move(*ns - i)
}
ns := ns[many('0',ns):0]
if *ns > 2 then return "\\u"||right(ns,4,"0")
else return char("16r"||ns)
}
default: {
return jerror.set_err("JSON string has invalid escape sequence: " ||
c || tab(upto(' \t\n\"{}[]:,')\1))
}
}
end
#
# String scanning helper function that identifies a JSON string and
# returns a Unicon string
#
procedure scan_string(jerror)
local str, ctrl, c, is_cset
str := ""
while any(~'\"', c := move(1)) do {
if c ~== "\\" then str ||:= c
else {
if ctrl := scan_ctrl_char(jerror) then str ||:= ctrl
else fail
}
}
if move(1) == "\"" then return "\""||str||"\""
else return jerror.set_err(
"JSON string missing terminating double-quotes: " || str)
end
#
# String scanning helper function that returns a JSON number as a string
#
procedure scan_number(c,jerror)
local num_str
num_str := ""
# if negative
if c == "-" then {
num_str ||:= c
c := move(1)
}
# integer
if any(&digits,c) then {
# append
num_str ||:= c
# c is 0
if c == "0" then {
# number starting with 0 is either 0, frac, or exp
if any(&digits,move(1)) then
return jerror.set_err("JSON int cannot have leading zero: " ||
num_str)
}
# c is 1-9, get all sequential digits
else num_str ||:= tab(many(&digits))
}
# fraction
if (c := move(1)) == "." then {
# append .
num_str ||:= c
# get any/all sequential digits
if not (num_str ||:= tab(many(&digits))) then
return jerror.set_err("Expected digits after '.' in JSON frac: " ||
num_str)
}
# exponent
if any('eE',c := move(1)) then {
# append e|E
num_str ||:= c
# check for -|+
if (c := move(1)) == "-" then num_str ||:= c
else if (c := move(1)) == "+" then {}
# check for sequential digits
if not (num_str ||:= tab(many(&digits))) then
return jerror.set_err("Expected digits after 'e' in JSON exp: " ||
num_str)
}
return num_str
end
#########################
# END SCANNER FUNCTIONS #
#########################
####################
# PARSER FUNCTIONS #
####################
#
# Takes a co-expression to generate JSON tokens.
# Returns a Unicon equivalent JSON structure.
#
procedure json_parser(token_gen,jerror)
local unicon_struct, struct, token
static parse_funcs
initial {
parse_funcs := table()
parse_funcs["{"] := parse_object
parse_funcs["["] := parse_array
parse_funcs["\""] := parse_string
parse_funcs["-"] := parse_number
parse_funcs["0"] := parse_number
parse_funcs["1"] := parse_number
parse_funcs["2"] := parse_number
parse_funcs["3"] := parse_number
parse_funcs["4"] := parse_number
parse_funcs["5"] := parse_number
parse_funcs["6"] := parse_number
parse_funcs["7"] := parse_number
parse_funcs["8"] := parse_number
parse_funcs["9"] := parse_number
parse_funcs["t"] := parse_true
parse_funcs["f"] := parse_false
parse_funcs["n"] := parse_null
}
while token := @token_gen do {
if \(func := parse_funcs[token[1]]) then {
if struct := func(token, token_gen, parse_funcs,jerror) then
suspend struct
else fail
}
else fail
}
end
#
# Helper parsing functions that return the equivalent Unicon data structure
#
#
# Returns a string to represent a boolean true
#
procedure parse_true(token, token_gen, parse_funcs, jerror)
return "__true__"
end
#
# Returns a string to represent a boolean false
#
procedure parse_false(token, token_gen, parse_funcs, jerror)
return "__false__"
end
#
# Returns the null value
#
procedure parse_null(token, token_gen, parse_funcs, jerror)
return &null
end
#
# Returns the numeric() of the token
#
procedure parse_number(token, token_gen, parse_funcs, jerror)
return numeric(token)
end
#
# Removes the delimiting double-quotes from the token. Converts to cset if
# there are delimiting single-quotes within the double-quotes.
#
procedure parse_string(token, token_gen, parse_funcs, jerror)
if token[2] == "\'" & token[-2] == "\'" then
return cset(token[3:-2])
else
return token[2:-1]
end
#
# Helper parsing function that recognizes the production rules for a
# JSON object. Returns a Unicon object (table, class, record, set, or cset)
# if successfully parsed.
#
procedure parse_object(token, token_gen, parse_funcs, jerror)
local json_object, json_key, json_value, prev_token, tok, func, first := 1
local constr, attribs, fields, uni_object, i
prev_token := token
# {json_object} is a table which holds the traditional JSON dictionary.
# {uni_object} is a unicon object which uses unique JSON object encoding,
# "__uniset__", "__unicset__", "__unitable__", "__uniclass__", and
# "__unirecord__". {json_object} is always built in case the unique Unicon
# JSON object encoding syntax is not satisfied.
while tok := @token_gen do {
# end of object, allow for trailing comma, e.g. {... ,}
if tok == "}" then {
if \constr then
return constr ! attribs # class/record
else if \uni_object then
return uni_object # set/cset
else
return json_object # dictionary/table
}
# Commas are valid only if the preceeding token is a value (not { or ,)
else if tok == "," then {
if not (prev_token == ("{"|",")) then prev_token := tok
else return jerror.set_err("Unexpected comma in JSON object after: "||
tok)
}
# JSON pair
else if prev_token == ("{"|",") then {
# Dictionary - JSON string as key only
if \(jerror.strict) then {
if tok[1] ~== "\"" then
return jerror.set_err("Expecting JSON string for object key, "||
"got "||tok)
json_key := tok[2:-1]
}
# For Unicon tables where the key can be any JSON value
else {
if /(func := parse_funcs[tok[1]]) then
return jerror.set_err("Invalid JSON object key value: "||tok)
prev_token := tok
json_key := func(tok, token_gen, parse_funcs, jerror)
}
# Colon
if (tok := @token_gen) == ":" then prev_token := tok
else return jerror.set_err("Expected colon in JSON pair before: "||tok)
# JSON value
if tok := @token_gen then {
if \(func := parse_funcs[tok[1]]) then {
json_value := func(tok, token_gen, parse_funcs, jerror)
prev_token := "value"
# Special case for handling Unicon data types
if \first then {
first := &null
case json_key of {
# {"__unicset__":"..."}
"__unicset__": {
if type(json_value) == "string" then
uni_object := cset(json_value)
json_object := table()
json_object[json_key] := json_value
next
}
# {"__uniset__":[...]}
"__uniset__": {
if type(json_value) == "list" then
uni_object := set(json_value)
json_object := table()
json_object[json_key] := json_value
next
}
# {"__unirecord__":"RecordName", ...}
#
# Try to create a class/record with its constructor.
# If the constructor doesn't exist, return a table instead
"__unirecord__": {
if constr := proc(json_value) then {
tmp := constr()
fields := [: fieldnames(tmp) :] | []
#fields := []
#every put(fields, fieldnames(tmp))
attribs := list(*fields)
next
}
json_object := table()
}
# {"__uniclass__":"ClassName", ...}
#
# Try to create a class with its constructor.
# If the constructor doesn't exist, return a table instead
"__uniclass__": {
if constr := proc(json_value) then {
fields := membernames(json_value)
attribs := list(*fields)
next
}
json_object := table()
}
# {"__unitable__":1, ...}
"__unitable__": {
json_object := table()
# Don't encode "__unitable__":1 into the table if
# correct syntax
if json_value === 1 then next
}
# Returning a dictionary (table)
default: {
json_object := table()
}
}
}
if \json_object then {
# Build a table
if type(\uni_object) ~== "table" then
uni_object := &null
json_object[json_key] := json_value
}
else {
# Build an attribute list for the class/record constructor.
# Check field names in case the JSON was user-constructed
# instead of from utoj*()
i := 1
every field := !fields do {
if field == json_key then {
attribs[i] := json_value
break
}
i +:= 1
}
}
}
else return jerror.set_err("Expected JSON value in JSON pair" ||
", got: " || tok)
}
else return jerror.set_err("JSON pair missing JSON value")
}
else return jerror.set_err("Token violated JSON object syntax: " || tok)
}
return jerror.set_err("Expected terminating } for JSON object")
end
#
# Helper parsing function recognizes the production rules for a JSON array.
# Returns a Unicon list if the syntax is proper (success).
#
procedure parse_array(token, token_gen, parse_funcs, jerror)
local json_array, json_value, prev_token, tok
prev_token := token
json_array := []
while tok := @token_gen do {
# end of array, return
if tok == "]" then {
# allow for a trailing comma, e.g. [... ,]
return json_array
}
# comma can only come after a value (not [ or ,)
else if tok == "," then {
if not any('[,', prev_token) then prev_token := tok
else return jerror.set_err("Unexpected comma in JSON array")
}
# value
else if \(func := parse_funcs[tok[1]]) then {
if prev_token == ("["|",") then {
json_value := func(tok, token_gen, parse_funcs, jerror)
prev_token := "value"
put(json_array, json_value)
}
else return jerror.set_err("Expected comma in JSON array before: " ||
tok)
}
# invalid array syntax
else {
if prev_token == ("["|",") then
return jerror.set_err("Expected JSON value, got: " || tok)
else
return jerror.set_err("Token violated JSON array syntax: " ||
tok)
}
}
return jerror.set_err("Expected terminating ] for JSON array")
end
########################
# END PARSER FUNCTIONS #
########################
This page produced by UniDoc on 2021/04/15 @ 23:59:44.