Tuesday, March 1, 2011

More on Significant Whitespace (in F#, Python, Lua, and Ruby)

Since I want to add it to my coding “Room of Requirement,” I decided to keep working on the indentation processor a bit and make it more general. So here is a version that uses an interface to do dependency injection. (And as always, the code and information here are presented "as-is" and without warranty or implied fitness of any kind; use at your own risk.)

There are a couple of things to note:

1) The indentation comparison is done through a function on the interface. This allows the interface to encapsulate *all* the information about the indentation. This has a number of benefits. As shown in the example below, it allows some types of data structures to act as their own indentation stack. Also, in more sophisticated versions, it would allow for custom processing of indentation based on context (e.g. multi-line argument lists, embedded multi-line strings, etc.).

2) I left out runtime validity checks in a few spots, leaving it up to the framework to throw an exception. In a real life implementation, some sort of custom exceptions might be better, so I’ve marked the spots with comments.

Since I am currently learning Python, Lua, and Ruby, I’m also including transliterations of the F# version into these languages. Since I am a relative newcomer to them, I apologize for any spots where I may have violated particular language idioms or best practices.

Of the three, the Python version looks the closest to the F# version, in large part because both languages use a block structure based on significant whitespace. The code has been tested in Python 3.2 and IronPython 2.6, but may work in earlier versions of either or IronPython 2.7.

For the Lua version, I chose to use the Lua object model described by Roberto Ierusalimschy in Programming in Lua. Lua does not have a native object type, and so objects must be implemented using associative tables. The way this is done is fascinating, and well worth reading up on even if you’re not planning on using Lua in the immediate future. It will make you a better programmer. The code was tested in Lua 5.1.4.

For the Ruby version, I did manage to use a couple of interesting Ruby-specific string routines. It was tested in Ruby 1.9.2 and IronRuby 0.9.

Last, I apologize for any problems in the formatting and/or syntax highlighting of the Lua, Python, and Ruby; I’m still getting tools for blogging in those languages.

F#:

open System
 
 
/// Callback handler for IndentOMatic.
type IIndentHandler =
  /// Adds a sibling.
  abstract Add:string->int->IIndentHandler
  /// Compares to the current indent level.
  abstract Cmp:string->int->int
  /// Pops to the parent.
  abstract Pop:unit->IIndentHandler
  /// Pushes a new child.
  abstract Push:string->int->IIndentHandler
 
 
/// Rudimentary indentation processor.
module IndentOMatic =
 
  /// Converts a string to a string, indent pair.
  let private stringToTag (s:string) =
    let rec leading l i (s:string) =
      match i>=l with
      | true -> i
      | _ ->
        match s.Chars(i) with
        | ' ' -> leading l (i+1) s
        | _ -> i
    let i = leading s.Length 0 s
    s.Substring(i),i
 
  /// Process a string and indent. 
  let rec private proc0 (ih:IIndentHandler) s i = 
    /// Pop the stack, looking for a parent
    /// or sibling.
    let rec scanUp (ih:IIndentHandler) = 
      match ih.Cmp s i with
      | n when n<0 -> scanUp (ih.Pop())
      | 0 -> ih
      | _ -> 
        // Custom exception here, if required.
        failwith "No matching unindent."
    match ih.Cmp s i with
    | n when n<0 -> proc0 (scanUp ih) s i
    | 0 -> ih.Add s i 
    | _ -> ih.Push s i
 
  /// Call to process a string. 
  let proc ih s =
    let t,i = stringToTag s
    proc0 ih t i
 
 
// Test.
 
/// Tree node that implements IIndentHandler.
type IHTestNode (parent:IHTestNode option,
                 tag:string,
                 indent:int) =
 
  /// Child branches.
  let mutable children:IHTestNode list = []
 
  /// Add a child.
  member private this.AddChild s i =
    let child = IHTestNode(Some(this),s,i)
    children <- child::children
    child :>IIndentHandler
 
  /// This does any remaining processing
  /// for an ongoing node stack,
  /// and returns the root.
  member this.Eol () =
    children <- List.rev children
    match parent with
    | None -> this
    | Some(p) -> p.Eol()
 
  /// Print the tree.
  member this.Print () =
    if indent>=0 then
      printfn "%s%s" (String(' ',indent)) tag
    children |> List.iter (fun (n:IHTestNode)->n.Print())
 
  /// IIndentHandler members.
  /// (See IIndentHandler documentation.)
  interface IIndentHandler with
    member this.Add s i =
      children <- List.rev children
      // Note: your parent=null test/exception here.
      parent.Value.AddChild s i 
    member this.Cmp _ i =
      i - indent 
    member this.Pop () =
      children <- List.rev children
      // Note: your parent=null test/exception here.
      parent.Value:>IIndentHandler 
    member this.Push s i =
      this.AddChild s i
 
 
// Generate some test data.
let test = 
  seq {
    for i in [0..1] do
      yield String.Format("{0}",i)
      for j in [0..1] do
        yield String.Format(" {0}{1}",i,j)
        for k in [0..3] do
          yield String.Format("   {0}{1}{2}",i,j,k)
  }
 
 
//Generate and print a tree.
((Seq.fold 
    IndentOMatic.proc 
    (IHTestNode(None,"root",-1):>IIndentHandler) 
    test):?>IHTestNode).Eol().Print()
 
 
printfn ""
printfn "Your breakpoint here."


Python:

# Rudimentary indentation processor.
class IndentOMatic():

# Converts a string to a string, indent pair.
@staticmethod
def __string_to_tag(s):
i = 0
l = len(s)
while i<l:
if s[i]!=' ': break
i += 1
return (s[i:],i)

# Process a string and indent.
@staticmethod
def __proc0(ih,s,i):
# Pop the stack, looking for a parent
# or sibling.
def __scan_up(ih):
cmp = ih.cmp(i)
if cmp<0:
return __scan_up(ih.pop())
elif cmp==0:
return ih
else:
# Custom exception here, if required.
raise Exception("No matching unindent.")
cmp = ih.cmp(i)
if cmp<0:
return IndentOMatic.__proc0(__scan_up(ih),s,i)
elif cmp==0:
return ih.add(s,i)
else:
return ih.push(s,i)

# Call to process a string.
@staticmethod
def proc(ih,s):
(t,i) = IndentOMatic.__string_to_tag(s)
return IndentOMatic.__proc0(ih,t,i)


# Tree node that implements the proper functions.
class IHTestNode ():

def __init__(self,parent,tag,indent):
self.parent = parent
self.tag = tag
self.indent = indent
self.children = []

# Add a child.
def __add_child(self,s,i):
child = IHTestNode(self,s,i)
self.children.append(child)
return child

# This does any remaining processing
# for an ongoing node stack,
# and returns the root.
def eol (self):
if self.parent==None:
return self
else:
return self.parent.eol()

# Standard overload.
def __str__(self):
return self.tag.rjust(self.indent+len(self.tag))

# Recursive print.
def print_me(self):
print(self)
for child in self.children:
child.print_me()

# Adds a sibling.
def add(self,s,i):
# Note: your parent=None test/exception here.
return self.parent.__add_child(s,i)

# Compares to the current indent level.
def cmp(self,i):
return i-self.indent

# Pops to the parent.
def pop(self):
# Note: your parent=None test/exception here.
return self.parent

# Pushes a new child.
def push(self,s,i):
return self.__add_child(s,i)


# Test.

def test():
for i in range(0,2):
yield "{0}".format(i)
for j in range(0,2):
yield " {0}{1}".format(i,j)
for k in range(0,4):
yield " {0}{1}{2}".format(i,j,k)

print("Begin")

ihtn = IHTestNode(None,"Test",-1)

for x in test():
ihtn = IndentOMatic.proc(ihtn,x)

ihtn = ihtn.eol()

print("End")

ihtn.print_me()



Lua:

-- Note:
-- I'm a newcomer to Lua;
-- I apologize for any bad practices.

-- Note:
-- The way OOP works in Lua is elegant,
-- but can be a little confusing at first.
-- For an explanation, see the chapter
-- on OOP in "Programming in Lua" by
-- Roberto Ierusalimschy.

-- IndentOMatic class.
IndentOMatic = {

string_to_tag = function (s)
local i = 1 -- Lua indexes are one-based.
local l = s:len()
while i<=l do
if s:sub(i,i)~=" " then break end
i = i+1
end
return { tag=s:sub(i,-1), indent=i-1 }
end, -- string_to_tag

proc_0 = function (ih,s,i)
function scan_up (ih)
local cmp = ih:cmp(i)
if cmp<0 then
return scan_up(ih:pop())
elseif cmp==0 then
return ih
else
-- Custom exception here, if required.
error("No matching unindent.")
end
end -- scan_up
local cmp = ih:cmp(i)
if cmp<0 then
return IndentOMatic.proc_0(scan_up(ih),s,i)
elseif cmp==0 then
return ih:add(s,i)
else
return ih:push(s,i)
end
end, -- proc_0

proc = function (ih,s)
local ti = IndentOMatic.string_to_tag(s)
return IndentOMatic.proc_0(ih,ti.tag,ti.indent)
end

} -- IndentOMatic


-- IndentHandler class.
IndentHandler = {

-- "Instance" constructor.
new = function (self,p,t,i)
local o = {}
o.parent = p
o.tag = t
o.indent = i
o.children = {}
setmetatable(o,self)
self.__index = self
return o
end, -- new

add_child = function (self,s,i)
local child = IndentHandler:new(self,s,i)
self.children[#self.children+1] = child
return child
end, -- add_child

eol = function (self)
if self.parent then
return self.parent:eol()
else
return self
end
end, -- eol

print_me = function (self)
function spaces (i)
local s = ""
while i>0 do
s = s.." "
i = i-1
end
return s
end
print(spaces(self.indent)..self.tag)
for i=1, #self.children do
self.children[i]:print_me()
end
end, -- print_me

add = function (self,s,i)
-- Custom parent null check here, if required.
return self.parent:add_child(s,i)
end, -- add

cmp = function (self,i)
return i-self.indent
end, -- cmp

pop = function (self)
-- Custom parent null check here, if required.
return self.parent
end, -- pop

push = function (self,s,i)
return self:add_child(s,i)
end -- push

} -- IndentHandler


e = IndentHandler:new(nil,"root",-1)

for i = 0,1 do
e = IndentOMatic.proc(
e,string.format("%d",i))
for j = 0,1 do
e = IndentOMatic.proc(
e,string.format(" %d%d",i,j))
for k = 0,3 do
e = IndentOMatic.proc(
e,string.format(" %d%d%d",i,j,k))
end
end
end

e = e:eol()
e:print_me()



Ruby:

class IndentOMatic

def IndentOMatic.string_to_tag (s)
i = 0
s.each_char {|x| if x!=' ' then break else i+=1 end}
[s[i,s.length],i]
end

def IndentOMatic.scan_up (ih,i)
cmp = ih.cmp(i)
if cmp<0 then
return IndentOMatic.scan_up(ih.pop(),i)
elsif cmp==0
return ih
else
raise "No matching unindent."
end
end

def IndentOMatic.proc0 (ih, s, i)
cmp = ih.cmp(i)
if cmp<0 then
return IndentOMatic.proc0(
IndentOMatic.scan_up(ih,i),s,i)
elif cmp==0
return ih.add(s,i)
else
return ih.push(s,i)
end
end

def IndentOMatic.proc (ih, s)
t,i = IndentOMatic.string_to_tag(s)
proc0(ih,t,i)
end

end # IndentOMatic


class IndentProcessor

def initialize (parent,tag,indent)
@parent = parent
@tag = tag
@indent = indent
@children = []
end

def add_child (s, i)
child = IndentProcessor.new(self,s,i)
@children << child
child
end

def eol ()
if @parent==nil then
return self
else
return @parent.eol()
end
end

def print_me ()
if @indent>=0 then
puts (" "*@indent)<<@tag
end
@children.each {|c| c.print_me() }
end

def add (s, i)
# Note: your parent=None test/exception here.
@parent.add_child(s,i)
end

def cmp (i)
i-@indent
end

def pop ()
# Note: your parent=None test/exception here.
@parent
end

def push (s, i)
add_child(s,i)
end

end # IndentProcessor


ih = IndentProcessor.new(nil,"root",-1)

ih = IndentOMatic.proc(ih,"a")
ih = IndentOMatic.proc(ih," b")
ih = IndentOMatic.proc(ih," c")
ih = IndentOMatic.proc(ih," d")
ih = IndentOMatic.proc(ih," e")

ih = ih.eol()

ih.print_me()