Friday, April 08, 2011

Stream to IEnumerable; IEnumerable to chunked IEnumerable

I may have missed these in F#, and they don't seem at all obvious in C# standard libraries. A couple of handy functions/extension methods with simple tests using the Should.Fluent assertion library.

In F#:

namespace Tinesware.Enumerables
open System
open System.Collections.Generic
open System.IO
open System.Linq
open Should.Fluent
module Chunk =
/// <summary>
/// Turns a stream into a byte sequence
/// </summary>
/// <param name="stream">The stream to wrap</param>
/// <returns>The stream as an enumeration</returns>
let ToEnumerable (stream : Stream) =
seq {
let result = ref <| stream.ReadByte()
while !result >= 0 do
yield (byte !result)
result := stream.ReadByte()
}
/// <summary>
/// Split a 'a seq into an 'a[] seq
/// </summary>
/// <typeparam name="'a">Element type of the array</typeparam>
/// <param name="source">The IEnumerable to operate on</param>
/// <param name="chunk">The window size</param>
/// <returns>The enumeration</returns>
let Window (chunk : int) (source : 'a seq) =
seq {
let result = ref (source |> Seq.truncate chunk |> Seq.toArray)
while (!result).Length > 0 do
yield !result
result := source |> Seq.truncate chunk |> Seq.toArray
}
/// <summary>
/// A Self-test program
/// </summary>
[<EntryPoint>]
let main a =
let input = seq { 0 .. 41 }
|> Seq.map byte
|> Seq.toArray
let inStream = new MemoryStream(input)
let channel = inStream |> ToEnumerable
let chunk = channel |> Seq.truncate 16 |> Seq.toArray
chunk.Length.Should().Equal(16) |> ignore
chunk.[0].Should().Equal((byte)0) |> ignore
let chunk = channel |> Seq.truncate 16 |> Seq.toArray
chunk.Length.Should().Equal(16) |> ignore
chunk.[0].Should().Equal((byte)16) |> ignore
let chunk = channel |> Seq.truncate 16 |> Seq.toArray
chunk.Length.Should().Equal(10) |> ignore
chunk.[0].Should().Equal((byte)32) |> ignore
inStream.Position <- int64 0
let chunks = inStream |> ToEnumerable |> Window 16 |> Seq.toArray
chunks.Length.Should().Equal(3) |> ignore
chunks.[0].Length.Should().Equal(16) |> ignore
chunks.[1].Length.Should().Equal(16) |> ignore
chunks.[2].Length.Should().Equal(10) |> ignore
0
view raw gistfile1.fs hosted with ❤ by GitHub

In C#:

namespace Tinesware.Enumerables
{
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Should.Fluent;
/// <summary>
/// More extension methods for enumerables
/// </summary>
public static class Chunk
{
/// <summary>
/// Split an IEnumerable<T> into an IEnumerable<T[]>
/// </summary>
/// <typeparam name="T">Element type of the array</typeparam>
/// <param name="source">The IEnumerable to operate on</param>
/// <param name="chunk">The window size</param>
/// <returns>The enumeration</returns>
public static IEnumerable<T[]> Window<T>(this IEnumerable<T> source, int chunk)
{
while (true)
{
var result = source.Take(chunk).ToArray();
if (result.Any())
{
yield return result;
}
else
{
break;
}
}
}
/// <summary>
/// Turns a stream into an IEnumerable<byte>
/// </summary>
/// <param name="stream">The stream to wrap</param>
/// <returns>The stream as an enumeration</returns>
public static IEnumerable<byte> ToEnumerable(this Stream stream)
{
while (true)
{
var result = stream.ReadByte();
if (result >= 0)
{
yield return (byte)result;
}
else
{
break;
}
}
}
/// <summary>
/// A Self-test program
/// </summary>
private static void Main()
{
var input = Enumerable.Range(0, 42).Select(x => (byte)x).ToArray();
var inStream = new MemoryStream(input);
var channel = inStream.ToEnumerable();
var chunk = channel.Take(16).ToArray();
chunk.Length.Should().Equal(16);
chunk[0].Should().Equal((byte)0);
chunk = channel.Take(16).ToArray();
chunk.Length.Should().Equal(16);
chunk[0].Should().Equal((byte)16);
chunk = channel.Take(16).ToArray();
chunk.Length.Should().Equal(10);
chunk[0].Should().Equal((byte)32);
inStream.Position = 0;
var chunks = inStream.ToEnumerable().Window(16).ToArray();
chunks.Length.Should().Equal(3);
chunks[0].Length.Should().Equal(16);
chunks[1].Length.Should().Equal(16);
chunks[2].Length.Should().Equal(10);
}
}
}
view raw gistfile1.cs hosted with ❤ by GitHub

2 comments :

Anonymous said...

If chunk is immutable won't window return the same array forever?

Steve Gilham said...

If you're wanting to use Window() in a more generalized form than just reading constant sized slabs off stateful enumerations like streams, the C# version you need is in the follow-up post.