############################################################################
#
# File: binary.icn
#
# Subject: Procedures to pack and unpack values
#
# Author: Robert J. Alexander
#
# Date: August 14, 1996
#
############################################################################
#
# This file is in the public domain.
#
############################################################################
#
# This is a collection of procedures that support conversion of Icon
# data elements to and from binary data formats. The purpose is to
# facilitate dealing with binary data files.
#
# The procedures can be used individually or via the "control"
# procedures pack() and unpack().
#
############################################################################
#
# The individual conversion functions are prefixed by either "pack_" or
# "unpack_" and are identified in comments by their format character(s).
# The "pack_" procedures convert from Icon to binary and take a single
# argument: the value to be converted. The "unpack_" procedures
# convert from binary to Icon and usually take no parameters -- they are
# executed within a string-scanning context and scan the necessary
# amount from the &subject string. Some of the "unpack_" functions take
# a parameter that specifies the length of the output string. The
# individual conversion procedures are minimally commented, but their
# action is apparent from their procedure names and the documentation
# of the pack() and unpack() procedures.
#
# The control procedures pack() and unpack() take a format string that
# controls conversions of several values (similar to the "printf" C
# library function). pack() and unpack() are patterned after the Perl
# (programming language) functions of the same names, and are documented
# below.
#
#
# pack(template,value1,...) : packed_binary_string
# ------------------------------------------------
#
# This procedure packs the "values" into a binary structure, returning
# the string containing the structure. The elements of any lists in the
# "value" parameters are processed individually as if they were
# "spliced" into the "value" parameter list. The "template" is a
# sequence of characters that give the order and type of values, as
# follows" (using C language terminology):
#
# a An ascii string, will be null padded (unstripped for unpack()).
# A An ascii string, will be space padded (trailing nulls and
# spaces will be stripped for unpack()).
# b A bit string, low-to-high order.
# B A bit string, high-to-low order.
# h A hexadecimal string, low-nybble-first.
# H A hexadecimal string, high-nybble-first.
# c A signed char value.
# C An unsigned char value.
# s A signed short value.
# S An unsigned short value.
# i A signed int value.
# I An unsigned int value.
# l A signed long value.
# L An unsigned long value.
# n A short in "network" order (big-endian).
# N A long in "network" order (big-endian).
# v A short in "vax" order (little-endian).
# V A long in "vax" order (little-endian).
# f A single-precision float in IEEE Motorola format.
# d A double-precision float in IEEE Motorola format.
# e An extended-precision float in IEEE Motorola format 80-bit.
# E An extended-precision float in IEEE Motorola format 96-bit.
# x Skip forward a byte (null-fill for pack()).
# X Back up a byte.
# @ Go to absolute position (null-fill if necessary for pack()).
# u A uu-encoded/decoded string.
#
# Each letter may optionally be followed by a number which gives a
# count. Together the letter and the count make a field specifier.
# Letters and numbers can be separated by white space which will be
# ignored. Types A, a, B, b, H, and h consume one value from the
# "value" list and produce a string of the length given as the
# field-specifier-count. The other types consume
# "field-specifier-count" values from the "value" list and append the
# appropriate data to the packed string.
#
#
# unpack(template,string) : value_list
# ------------------------------------
#
# This procedure does the reverse of pack(): it takes a string
# representing a structure and expands it out into a list of values.
# The template has mostly the same format as for pack() -- see pack(),
# above.
#
#
# Endianicity of integers
# -----------------------
#
# Integer values can be packed and unpacked in either big-endian
# (Motorola) or little-endian (Intel) order. The default is big-endian.
# Procedures pack_little_endian() and pack_big_endian() set the
# mode for future packs and unpacks.
#
#
# Size of ints
# ------------
#
# The "i" (signed int) and "I" (unsigned int) types can pack and unpack
# either 16-bit or 32-bit values. 32-bit is the default. Procedures
# pack_int_as_short() and pack_int_as_long() change the mode for
# future packs and unpacks.
#
############################################################################
#
# To Do List
#
# - implement other-endian versions of floats (only big-endian supported
# now).
#
#
# The implementation
#
global pack_short,pack_long,
unpack_short,unpack_unsigned_short,
unpack_long,unpack_unsigned_long,
pack_int_proc,unpack_int_proc,unpack_unsigned_int_proc
procedure pack(template,values[]) #: pack values into a string
local result,t,n,c,v,spliced_values
initial if /pack_short then pack_big_endian()
spliced_values := []
every v := !values do {
if type(v) == "list" then spliced_values |||:= v
else put(spliced_values,v)
}
values := spliced_values
result := ""
every t := pack_parse_template(template) do {
n := t.count
c := t.conversion
case c of {
!"aAbBhH": {
#
# Handle string.
#
v := string(get(values)) | break
if n == "*" then n := *v
result ||:= (case c of {
!"aA": if integer(n) then left(v,n,if c == "A" then " "
else "\0") else v
default: (case c of {
"b": pack_bits_low_to_high
"B": pack_bits_high_to_low
"h": pack_hex_low_to_high
"H": pack_hex_high_to_low
})(v[1:n + 1 | 0])
}) | break
}
"@": result := left(result,n + 1,"\0")
"x": result := left(result,*result + n,"\0")
"X": result := left(result,*result - n)
default: {
#
# Handle item that consumes argument(s).
#
every if n === "*" then &null else 1 to n do {
v := get(values) | break
result ||:= (case c of {
!"cC": pack_char
!"sS": pack_short
!"iI": pack_int
!"lL": pack_long
"n": pack_nshort
"N": pack_nlong
"v": pack_vshort
"V": pack_vlong
"f": pack_single_float
"d": pack_double_float
"e": pack_extended_float
"E": pack_extended96_float
"u": pack_uuencoded_string
})(v) | break
}
}
}
}
return result
end
procedure unpack(template,binaryString) #: unpack values from string
local result,t,n,c,v
initial if /unpack_short then pack_big_endian()
result := []
binaryString ? {
every t := pack_parse_template(template) do {
n := t.count
c := t.conversion
case c of {
"X": move(-integer(n)) | tab(1)
"x": move(integer(n)) | tab(0)
"@": tab(if n === "*" then 0 else n)
!"aA": {
v := move(integer(n)) | tab(0)
if c == "A" then v := trim(v,' \t\0')
put(result,v)
}
!"bBhH": {
put(result,(case c of {
"b": unpack_bits_low_to_high
"B": unpack_bits_high_to_low
"h": unpack_hex_low_to_high
"H": unpack_hex_high_to_low
})(n))
}
default: {
every if n === "*" then &null else 1 to n do {
if pos(0) then break
put(result,(case c of {
"c": unpack_char
"C": unpack_unsigned_char
"s": unpack_short
"S": unpack_unsigned_short
"i": unpack_int
"I": unpack_unsigned_int
"l": unpack_long
"L": unpack_unsigned_long
"n": unpack_nshort
"N": unpack_nlong
"v": unpack_vshort
"V": unpack_vlong
"f": unpack_single_float
"d": unpack_double_float
"e": unpack_extended_float
"E": unpack_extended96_float
"u": unpack_uuencoded_string
})()) | break
}
}
}
}
}
return result
end
record pack_template_rec(conversion,count)
procedure pack_parse_template(template)
local c,n
template ? {
pack_parse_space()
while c := tab(any('aAbBhHcCsSiIlLnNvVfdeExX@u')) do {
pack_parse_space()
n := ="*" | integer(tab(many(&digits))) | 1
suspend pack_template_rec(c,n)
pack_parse_space()
}
}
end
procedure pack_parse_space()
suspend tab(many(' \t'))
end
procedure pack_big_endian()
pack_short := pack_nshort
pack_long := pack_nlong
unpack_short := unpack_nshort
unpack_unsigned_short := unpack_unsigned_nshort
unpack_long := unpack_nlong
unpack_unsigned_long := unpack_unsigned_nlong
case pack_int_proc of {
pack_vshort: pack_int_as_short()
pack_vlong: pack_int_as_long()
}
return
end
procedure pack_little_endian()
pack_short := pack_vshort
pack_long := pack_vlong
unpack_short := unpack_vshort
unpack_unsigned_short := unpack_unsigned_vshort
unpack_long := unpack_vlong
unpack_unsigned_long := unpack_unsigned_vlong
case pack_int_proc of {
pack_nshort: pack_int_as_short()
pack_nlong: pack_int_as_long()
}
return
end
procedure pack_int_as_long()
pack_int_proc := pack_long
unpack_int_proc := unpack_long
unpack_unsigned_int_proc := unpack_unsigned_long
return
end
procedure pack_int_as_short()
pack_int_proc := pack_short
unpack_int_proc := unpack_short
unpack_unsigned_int_proc := unpack_unsigned_short
return
end
#
# "b"
#
procedure pack_bits_low_to_high(v)
local result,n,b,buf
result := ""
n := buf := 0
every b := !v do {
buf := ior(ishift(buf,-1),ishift(b % 2,7))
n +:= 1
if n = 8 then {
result ||:= char(buf)
n := buf := 0
}
}
if n > 0 then {
result ||:= char(ishift(buf,-(8 - n)))
}
return result
end
#
# "B"
#
procedure pack_bits_high_to_low(v)
local result,n,b,buf
result := ""
n := buf := 0
every b := !v do {
buf := ior(ishift(buf,1),b % 2)
n +:= 1
if n = 8 then {
result ||:= char(buf)
n := buf := 0
}
}
if n > 0 then {
result ||:= char(ishift(buf,8 - n))
}
return result
end
#
# "h"
#
procedure pack_hex_low_to_high(v)
local result,pair
result := ""
v ? {
while pair := move(2) do {
result ||:= char(ior(pack_hex_digit(pair[1]),
ishift(pack_hex_digit(pair[2]),4)))
}
result ||:= char(pack_hex_digit(move(1)))
}
return result
end
#
# "H"
#
procedure pack_hex_high_to_low(v)
local result,pair
result := ""
v ? {
while pair := move(2) do {
result ||:= char(ior(pack_hex_digit(pair[2]),
ishift(pack_hex_digit(pair[1]),4)))
}
result ||:= char(ishift(pack_hex_digit(move(1)),4))
}
return result
end
procedure pack_hex_digit(s)
return (case map(s) of {
"0": 2r0000
"1": 2r0001
"2": 2r0010
"3": 2r0011
"4": 2r0100
"5": 2r0101
"6": 2r0110
"7": 2r0111
"8": 2r1000
"9": 2r1001
"a": 2r1010
"b": 2r1011
"c": 2r1100
"d": 2r1101
"e": 2r1110
"f": 2r1111
}) | stop("bad hex digit: ",image(s))
end
#
# "c" and "C"
#
procedure pack_char(v)
if v < 0 then v +:= 256
return char(v)
end
#
# "s" and "S" (big-endian)
#
procedure pack_nshort(v)
if v < 0 then v +:= 65536
return char(v / 256) || char(v % 256)
end
#
# "s" and "S" (little-endian)
#
procedure pack_vshort(v)
if v < 0 then v +:= 65536
return char(v % 256) || char(v / 256)
end
#
# "i" and "I"
#
procedure pack_int(v)
initial /pack_int_proc := pack_long
return pack_int_proc(v)
end
#
# "l" and "L" (big-endian)
#
procedure pack_nlong(v)
local result
if v < 0 then v +:= 4294967296
result := ""
every 1 to 4 do {
result ||:= char(v % 256)
v /:= 256
}
return reverse(result)
end
#
# "l" and "L" (little-endian)
#
procedure pack_vlong(v)
local result
if v < 0 then v +:= 4294967296
result := ""
every 1 to 4 do {
result ||:= char(v % 256)
v /:= 256
}
return result
end
#
# "u"
#
procedure pack_uuencoded_string(v)
return UUEncodeString(v)
end
#
# "b"
#
procedure unpack_bits_low_to_high(n)
local result,c,r
result := ""
while *result < n do {
c := ord(move(1)) | fail
r := ""
every 1 to 8 do {
r ||:= iand(c,1)
c := ishift(c,-1)
}
result ||:= r
}
return result[1+:n] | result
end
#
# "B"
#
procedure unpack_bits_high_to_low(n)
local result,c,r
result := ""
while *result < n do {
c := ord(move(1)) | fail
r := ""
every 1 to 8 do {
r := iand(c,1) || r
c := ishift(c,-1)
}
result ||:= r
}
return result[1+:n] | result
end
#
# "h"
#
procedure unpack_hex_low_to_high(n)
local result,c
result := ""
while *result < n do {
c := ord(move(1)) | fail
result ||:= unpack_hex_digit(iand(c,16rf)) ||
unpack_hex_digit(ishift(c,-4))
}
return result[1+:n] | result
end
#
# "H"
#
procedure unpack_hex_high_to_low(n)
local result,c
result := ""
while *result < n do {
c := ord(move(1)) | fail
result ||:= unpack_hex_digit(ishift(c,-4)) ||
unpack_hex_digit(iand(c,16rf))
}
return result[1+:n] | result
end
procedure unpack_hex_digit(i)
return "0123456789abcdef"[i + 1]
end
#
# "c"
#
procedure unpack_char()
local v
v := ord(move(1)) | fail
if v >= 128 then v -:= 256
return v
end
#
# "C"
#
procedure unpack_unsigned_char()
return ord(move(1))
end
#
# "n" and "s" (big-endian)
#
procedure unpack_nshort()
local v
v := unpack_unsigned_nshort() | fail
if v >= 32768 then v -:= 65536
return v
end
#
# "v" and "s" (little-endian)
#
procedure unpack_vshort()
local v
v := unpack_unsigned_vshort() | fail
if v >= 32768 then v -:= 65536
return v
end
#
# "S" (big-endian)
#
procedure unpack_unsigned_nshort()
return 256 * ord(move(1)) + ord(move(1))
end
#
# "S" (little-endian)
#
procedure unpack_unsigned_vshort()
return ord(move(1)) + 256 * ord(move(1))
end
#
# "i"
#
procedure unpack_int()
initial /unpack_int_proc := unpack_long
return unpack_int_proc()
end
#
# "I" (aye)
#
procedure unpack_unsigned_int()
initial /unpack_unsigned_int_proc := unpack_unsigned_long
return unpack_unsigned_int_proc()
end
#
# "N" and "l" (ell) (big-endian)
#
procedure unpack_nlong()
local v
v := 0
every 1 to 4 do {
v := 256 * v + ord(move(1)) | fail
}
if v >= 2147483648 then v -:= 4294967296
return v
end
#
# "V" and "l" (ell) (little-endian)
#
procedure unpack_vlong()
local v,m
v := 0
m := 1
every 1 to 4 do {
v := v + m * ord(move(1)) | fail
m *:= 256
}
if v >= 2147483648 then v -:= 4294967296
return v
end
#
# "L" (big-endian)
#
procedure unpack_unsigned_nlong()
local v
v := 0
every 1 to 4 do {
v := v * 256 + ord(move(1)) | fail
}
return v
end
#
# "L" (little-endian)
#
procedure unpack_unsigned_vlong()
local v,m
v := 0
m := 1
every 1 to 4 do {
v := v + m * ord(move(1)) | fail
m *:= 256
}
return v
end
#
# "u"
#
procedure unpack_uuencoded_string()
return UUDecodeString(tab(0))
end
#
# Procedures for converting real values from input streams. These
# procedures accept standard IEEE floating point values as strings,
# usually as read from a file, and return their numeric equivalent as a
# "real". The degree of accuracy is likely to vary with different
# implementations of Icon.
#
# Requires large integers.
#
# Parameter Float Double Extended Extended96
# =================================================================
# Size (bytes:bits) 4:32 8:64 10:80 12:96
#
# Range of binary exponents
# Minimum -126 -1022 -16383 -16383
# Maximum +127 +1023 +16383 +16383
# Exponent width in bits 8 11 15 15
# Exponent bias +127 +1023 +16383 +16383
#
# Significand precision
# Bits 24 53 64 64
# Decimal digits 7-8 15-16 18-19 18-19
#
# Decimal range approximate
# Maximum positive 3.4E+38 1.7E+308 1.1E+4932
# Minimum positive norm 1.2E-38 2.3E-308 1.7E-4932
# Minimum positive denorm 1.5E-45 5.0E-324 1.9E-4951
# Maximum negative denorm -1.5E-45 -5.0E-324 -1.9E-4951
# Maximum negative norm -1.2E-38 -2.3E-308 -1.7E-4932
# Minimum negative -3.4E+38 -1.7E+308 -1.1E+4932
#
#
# "d"
#
procedure pack_double_float(v)
local exp,mant,result,av
static dvsr
initial dvsr := 2.0 ^ 52
v := real(v)
if v = 0.0 then return "\0\0\0\0\0\0\0\0"
else {
av := abs(v)
exp := integer(log(av,2))
if exp <= -1023 then return "\0\0\0\0\0\0\0\0"
if exp > 1023 then return if v < 0.0 then "\xff\xf0\0\0\0\0\0\0"
else "\x7f\xf0\0\0\0\0\0\0"
mant := integer(av / 2.0 ^ real(exp) * dvsr + 0.5)
exp +:= 1023
result := ""
every 3 to 8 do {
result := char(mant % 256) || result
mant /:= 256
}
result := char(ior(if v < 0.0 then 16r80 else 0,ishift(exp,-4))) ||
char(ior(iand(mant % 256,16rf),iand(ishift(exp,4),16rf0))) ||
result
return result
}
end
#
# "f"
#
procedure pack_single_float(v)
local exp,mant,result,av
static dvsr
initial dvsr := 2.0 ^ 23
v := real(v)
if v = 0.0 then return "\0\0\0\0"
else {
av := abs(v)
exp := integer(log(av,2))
if exp <= -127 then return "\0\0\0\0"
if exp > 127 then return if v < 0.0 then "\xff\x80\0\0"
else "\x7f\x80\0\0"
mant := integer(av / 2.0 ^ real(exp) * dvsr + 0.5)
exp +:= 127
result := ""
every 3 to 4 do {
result := char(mant % 256) || result
mant /:= 256
}
result := char(ior(if v < 0.0 then 16r80 else 0,ishift(exp,-1))) ||
char(ior(iand(mant % 256,16r7f),iand(ishift(exp,7),16r80))) ||
result
return result
}
end
#
# "e"
#
procedure pack_extended_float(v)
local exp,mant,result,av
static dvsr
initial dvsr := 2.0 ^ 63
v := real(v)
if v = 0.0 then return "\0\0\0\0\0\0\0\0\0\0"
else {
av := abs(v)
exp := integer(log(av,2))
if exp <= -16383 then return "\0\0\0\0\0\0\0\0\0\0"
if exp > 16383 then return if v < 0.0 then "\xff\xff\0\0\0\0\0\0\0\0"
else "\x7f\xff\0\0\0\0\0\0\0\0"
mant := integer(av / 2.0 ^ real(exp) * dvsr + 0.5)
exp +:= 16383
result := ""
every 3 to 10 do {
result := char(mant % 256) || result
mant /:= 256
}
result := char(ior(if v < 0.0 then 16r80 else 0,ishift(exp,-8))) ||
char(iand(exp,16rff)) ||
result
return result
}
end
#
# "E"
#
procedure pack_extended96_float(v)
return pack_x80tox96(pack_extended_float(v))
end
#
# "d"
#
procedure unpack_double_float()
local exp,mant,v,i,s
static dvsr
initial dvsr := 2.0 ^ 52
(s := move(8)) | fail
exp := ior(ishift(iand(ord(s[1]),16r7f),4),ishift(ord(s[2]),-4)) - 1023
v := if exp = -1023 then 0.0
else {
mant := ior(16r10,iand(ord(s[2]),16r0f))
every i := 3 to 8 do
mant := mant * 256 + ord(s[i])
mant / dvsr * 2.0 ^ real(exp)
}
return if s[1] >>= "\x80" then -v else v
end
#
# "f"
#
procedure unpack_single_float()
local exp,mant,v,i,s
static dvsr
initial dvsr := 2.0 ^ 23
(s := move(4)) | fail
exp := ior(ishift(iand(ord(s[1]),16r7f),1),ishift(ord(s[2]),-7)) - 127
v := if exp = -127 then 0.0
else {
mant := ior(16r80,iand(ord(s[2]),16r7f))
every i := 3 to 4 do
mant := mant * 256 + ord(s[i])
mant / dvsr * 2.0 ^ real(exp)
}
return if s[1] >>= "\x80" then -v else v
end
#
# "e"
#
procedure unpack_extended_float(s)
local exp,mant,v,i
static dvsr
initial dvsr := 2.0 ^ 63
if /s then
(s := move(10)) | fail
exp := ior(ishift(iand(ord(s[1]),16r7f),8),ord(s[2])) - 16383
v := if exp = -16383 then 0.0
else {
mant := ord(s[3])
every i := 4 to 10 do
mant := mant * 256 + ord(s[i])
mant / dvsr * 2.0 ^ real(exp)
}
return if s[1] >>= "\x80" then -v else v
end
#
# "E"
#
procedure unpack_extended96_float()
return unpack_extended_float(pack_x96tox80(move(12)))
end
procedure pack_x80tox96(s)
return s[1:3] || "\0\0" || s[3:0]
end
procedure pack_x96tox80(s)
return s[1:3] || s[5:0]
end
#
# Procedures for working with UNIX "uuencode" format.
#
global UUErrorText
#
# Decode a uu-encoded string.
#
procedure UUDecodeString(s)
local len
s ? {
len := UUDecodeChar(move(1))
s := ""
while s ||:= UUDecodeQuad(move(4))
if not pos(0) then {
UUErrorText := "not multiple of 4 encoded characters"
fail
}
if not (0 <= *s - len < 3) then {
UUErrorText := "actual length, " || *s ||
" doesn't jive with length character, " || len
fail
}
}
return s[1+:len] | s
end
#
# Get a binary value from a uu-encoded character.
#
procedure UUDecodeChar(s)
static spaceVal
initial spaceVal := ord(" ")
return ord(s) - spaceVal
end
#
# Decode 4-byte encoded string to 3-bytes of binary data.
#
procedure UUDecodeQuad(s)
local v1,v2,v3,v4
*s = 4 | {
write(&errout,"Input string not of length 4")
runerr(500,s)
}
v1 := UUDecodeChar(s[1])
v2 := UUDecodeChar(s[2])
v3 := UUDecodeChar(s[3])
v4 := UUDecodeChar(s[4])
return (
char(ior(ishift(v1,2),ishift(v2,-4))) ||
char(ior(ishift(iand(v2,16rf),4),ishift(v3,-2))) ||
char(ior(ishift(iand(v3,16r3),6),v4))
)
end
#
# Convert "s" to uu-encoded format.
#
procedure UUEncodeString(s)
local outLine
s ? {
outLine := ""
until pos(0) do
outLine ||:= UUEncodeTriple(move(3) | tab(0))
}
return UUEncodeChar(*s) || outLine
end
#
# Get the ascii character for uu-encoding "i".
#
procedure UUEncodeChar(i)
static spaceVal
initial spaceVal := ord(" ")
return char(i + spaceVal)
end
#
# Encode to 3-bytes of binary data into 4-byte uu-encoded string.
#
procedure UUEncodeTriple(s)
local v1,v2,v3
v1 := ord(s[1])
v2 := ord(s[2]) | 0
v3 := ord(s[3]) | 0
return (
UUEncodeChar(ishift(v1,-2)) ||
UUEncodeChar(ior(ishift(iand(v1,16r3),4),ishift(v2,-4))) ||
UUEncodeChar(ior(ishift(iand(v2,16rf),2),ishift(v3,-6))) ||
UUEncodeChar(iand(v3,16r3f))
)
end
This page produced by UniDoc on 2021/04/15 @ 23:59:44.