Wednesday, December 14, 2011

An approximate type-based solution to the strings problem

As ever, neat ideas coded in Haskell can be almost but not quite ported to F#, foiled at the last moment by the fact that the .net type system cannot represent constructs that are expressed as Haskell type classes : there's always a run-time type coercion and some leakage of abstractions that have to be coded around.

One such is this idea of type-safe strings for representing structured data like XML -- which would require a little more mutually recursive and self-encapsulating type definitions than I think F# can manage. It gets close, though, and could probably be embellished a little further:

#light

open System

module TypeStrings =

    let rec foldr f z l =
        match l with
        | [] -> z
        | x::xs -> f x (foldr f z xs)

    [<AbstractClass>]
    type SafeStringBase(content : String) =
        member this.AsString with get() = content
        override this.ToString() = content
        
    // Now here #SafeStringBase really means SafeString of the corresponding 
    // concrete language type but there isn't a way to express that
    type ILanguage =
        abstract member LiteralFragment         : String -> #SafeStringBase // String is a literal language fragment
        abstract member LiteralText             : String -> #SafeStringBase // String is literal text
        abstract member NativeRepresentation    : #SafeStringBase -> String   // Gets the native-language representation
        abstract member Language                : unit -> String             // Gets the name of the language  
        abstract member Empty                   : unit -> #SafeStringBase     // creates an empty SafeString in the Language
        abstract member Add                     : #SafeStringBase -> #SafeStringBase -> #SafeStringBase
        
    type SafeString<'TLanguage when 'TLanguage :> ILanguage and 'TLanguage : (new : unit -> 'TLanguage) > (content : String) =
        inherit SafeStringBase(content)
        static let language : 'TLanguage = new 'TLanguage()
        
        // this is ugly
        static member private ToType (x:#SafeStringBase) =
            x :> SafeStringBase :?> SafeString<'TLanguage>
        
        static member Empty : SafeString<'TLanguage> =
            language.Empty() |> SafeString<'TLanguage>.ToType
        
        // takes a string that you certify as representing a fragment in the Language 
        // and returns a corresponding SafeString
        static member Fragment fragment =
            language.LiteralFragment fragment |> SafeString<'TLanguage>.ToType
        
        // takes a string that you certify as representing text and returns a 
        // corresponding SafeString in the Language
        static member Text text =
            language.LiteralText text |> SafeString<'TLanguage>.ToType
            
        static member Join (strings : seq<SafeString<'TLanguage>>) =
          let l = Seq.toList strings
          (foldr language.Add (language.Empty()) l) |> SafeString<'TLanguage>.ToType
        
        static member (+) ((self: SafeString<'TLanguage>), (other: SafeString<'TLanguage>)) : SafeString<'TLanguage> =
            (language.Add self other)  |> SafeString<'TLanguage>.ToType
            
          
    type Xml () =
      // this too is ugly
      static member private ToType (x:#SafeStringBase) =
            x :> SafeStringBase :?> SafeString<Xml>
      static member private ToHashType (x:#SafeStringBase) =
            x :> SafeStringBase :?> #SafeStringBase
            
      interface ILanguage with
        member self.LiteralFragment s = Xml.ToHashType (new SafeString<Xml>(s)) 
        member self.LiteralText s = Xml.ToHashType (new SafeString<Xml>(System.Security.SecurityElement.Escape(s)))
        member self.NativeRepresentation s =
            s.AsString.Replace("&apos;", "'").
              Replace("&quot;", "\"").Replace("&gt;", ">").
              Replace("&lt;", "<").Replace("&amp;", "&")
        member self.Language () = "XML"
        member self.Empty () = Xml.ToHashType (new SafeString<Xml>(String.Empty))
        
        // Don't think we can force this at compile time
        // The best we can do is encapsulate this operation
        member self.Add x y = 
            let left = Xml.ToType x
            let right = Xml.ToType y
            Xml.ToHashType (new SafeString<Xml>(x.AsString + y.AsString)) 
      
    [<EntryPoint>]
    let main a =
        let frag = SafeString<Xml>.Fragment "<em>wow!</em>"
        printfn "%s" frag.AsString
        let text = SafeString<Xml>.Text "ham & eggs"
        printfn "%s" text.AsString
        
        // We're safe from doing
        //
        // frag + "ham & eggs"
        //
        // Error FS0001: Type constraint mismatch. The type      string     is not compatible 
        // with type     SafeString<Xml>     
        // The type 'string' is not compatible with the type 'SafeString<Xml>'
        
        let sum = frag + text
        printfn "%s" sum.AsString
        0    

1 comments:

Dave said...

I got to this from PlanetF# and decided to play with it a little.

I was able to get rid of the SafeStringBase and the # types by making the ILanguage interface generic, adding some type constraints to SafeString.

Instead, the Xml type has to inherit from SafeString and has to define a constructor explicitly. However, I think this solution is a little close to Haskell type classes.

Sorry I don't know how to paste it as code so will put it here just as plain text.

#light

open System

module TypeStrings =

let rec foldr f z l =
match l with
| [] -> z
| x::xs -> f x (foldr f z xs)

type ILanguage<'T when 'T :> ILanguage<'T>> =
abstract member LiteralFragment : String -> 'T // String is a literal language fragment
abstract member LiteralText : String -> 'T // String is literal text
abstract member NativeRepresentation : 'T -> String // Gets the native-language representation
abstract member Language : unit -> String // Gets the name of the language
abstract member Empty : unit -> 'T // creates an empty SafeString in the Language
abstract member Add : 'T -> 'T -> 'T

type SafeString<'TLanguage when 'TLanguage :> ILanguage<'TLanguage> and 'TLanguage :> SafeString<'TLanguage> and 'TLanguage : (new : unit -> 'TLanguage) > (content: String) =
static let language : 'TLanguage = new 'TLanguage()

static member Empty : SafeString<'TLanguage> =
language.Empty() :> SafeString<'TLanguage>

static member Fragment fragment =
language.LiteralFragment fragment :> SafeString<'TLanguage>

static member Text text =
language.LiteralText text :> SafeString<'TLanguage>

static member Join (strings : seq<'TLanguage>) =
let l = Seq.toList strings
(foldr language.Add (language.Empty()) l) :> SafeString<'TLanguage>

static member Lang with get() = language

static member (+) ((self: SafeString<'TLanguage>), (other: SafeString<'TLanguage>)) : SafeString<'TLanguage> =
// don't think you can get rid of the down cast here
(language.Add (self :?> 'TLanguage) (other :?> 'TLanguage)) :> SafeString<'TLanguage>

member this.AsString with get() = content


type Xml (content: String) =
inherit SafeString(content)

// this is a required hack
new () = Xml("")

interface ILanguage with
member self.LiteralFragment s = new Xml(s)
member self.LiteralText s = new Xml(System.Security.SecurityElement.Escape(s))
member self.NativeRepresentation s =
s.AsString.Replace("&apos;", "'").
Replace(""", "\"").Replace(">", ">").
Replace("<", "<").Replace("&", "&")
member self.Language () = "XML"
member self.Empty () = new Xml(String.Empty)

member self.Add x y = (new Xml(x.AsString + y.AsString))

[]
let main a =
let frag = SafeString.Fragment "wow!"
printfn "%s" frag.AsString
let text = SafeString.Text "ham & eggs"
printfn "%s" text.AsString

let sum = frag + text
printfn "%s" sum.AsString
0