Port of schuchert.wikispaces.com


PowerShell5-Tokenize_Expression-Convert_Tokenizer_To_An_Enumerator

PowerShell5-Tokenize_Expression-Convert_Tokenizer_To_An_Enumerator

Up

The Tokenizer converts a whole expression into an array of tokens. Now we’ll convert it to an Enumerator.

Convert Tokenizer to Enumerator

We are going to convert this in place while maintaining the tests.

Add Required Interfaces

class Tokenizer : IEnumerable, IEnumerator 
    [IEnumerator]GetEnumerator() {
        return $this
    }
    
    [bool]MoveNext() {
        return $false
    }
    
    [Object]get_Current() {
        return $null
    }
    
    [void]Reset() {
    }
    It "Should enummerate <expression> into <expected>" -TestCase @(
        @{expression = '42'; expected = @('42')}
    ) {
        param($expression, $expected)
        $tokenizer = [Tokenizer]::new($expression)

        for($i = 0; $i -lt $expected.Count; ++$i) {
            $tokenizer.MoveNext()
            $tokenizer.Current | Should be $expected[$i]
        }
        $tokenizer.MoveNext() | Should be $false
    } 
    [String]$currentExpression
    Tokenizer($expression) {
        $this.currentExpression = $expression
    }

    [IEnumerator]GetEnumerator() {
        return $this
    }

    [bool]MoveNext() {
        return $false
    }

    [Object]get_Current() {
        return $this.currentExpression
    }

    [void]Reset() {
    }

There are a few things to note in this first version:

    It "Should enummerate <expression> into <expected>" -TestCase @(
        @{expression = '42'; expected = @('42')}
        @{expression = '123+'; expected = @('123', '+')}
    ) {
    [-] Should enummerate 123+ into 123 + 92ms
      Expected string length 3 but was 4. Strings differ at index 3.
      Expected: {123}
      But was:  {123+}
      --------------^
      37:             $tokenizer.Current | Should be $expected[$i]
      at Invoke-LegacyAssertion, C:\Program Files\WindowsPowerShell\Modules\Pester\4.0.8\Functions\Assertions\Should.ps1: line 190
      at <ScriptBlock>, C:\Users\Brett\src\shunting_yard_powershell_3\Tokenizer.Tests.ps1: line 37
    [String]$currentExpression
    [String]$currentToken

    [bool]MoveNext() {
        $this.currentToken = $null

        foreach ($r in [Tokenizer]::REGEX) {
            if($this.currentExpression -match $r) {
                $this.currentToken = $Matches[1]
                $this.currentExpression = $this.currentExpression.Substring($this.currentToken.Length)
                break
            }
        }
        return $this.currentExpression.Length -gt 0
    }

    [Object]get_Current() {
        return $this.currentToken
    }
        @{expression = '99*34'; expected = @('99', '*', '34')}
    It "Should enummerate <expression> into <expected>" -TestCase @(
        @{expression = '42'; expected = @('42')}
        @{expression = '123+'; expected = @('123', '+')}
        @{expression = '99*34'; expected = @('99', '*', '34')}
        @{expression = '1+2+3+4'; expected = @('1', '+', '2', '+', '3', '+', '4')}
        @{expression = 'a'; expected = @('a')}
        @{expression = 'foo+bar'; expected = @('foo', '+', 'bar')}
        @{expression = '++foo'; expected = @('++', 'foo')}
        @{expression = '   foo  + -bar  = baz   '; expected = @('foo', '+', '-', 'bar', '=', 'baz')}
        @{expression = '(a)'; expected = @('(', 'a', ')')}
        @{expression = '(())'; expected = @('(', '(', ')', ')')}
        @{expression = 'f(g(3))'; expected = @('f(', 'g(', '3', ')', ')')}
    ) {
        param($expression, $expected)
        $tokenizer = [Tokenizer]::new($expression)

        for($i = 0; $i -lt $expected.Count; ++$i) {
            $tokenizer.MoveNext()
            $tokenizer.Current | Should be $expected[$i]
        }
        $tokenizer.MoveNext() | Should be $false
    } 
    [+] Should enummerate ++foo into ++ foo 15ms
    [-] Should enummerate    foo  + -bar  = baz    into foo + - bar = baz 84ms
      Expected string length 3 but was 0. Strings differ at index 0.
      Expected: {foo}
      But was:  {}
      -----------^
      46:             $tokenizer.Current | Should be $expected[$i]
      at Invoke-LegacyAssertion, C:\Program Files\WindowsPowerShell\Modules\Pester\4.0.8\Functions\Assertions\Should.ps1: line 190
      at <ScriptBlock>, C:\Users\Brett\src\shunting_yard_powershell_3\Tokenizer.Tests.ps1: line 46
    [+] Should enummerate (a) into ( a ) 69ms
        $this.currentExpression = $this.currentExpression -replace ('^\s+', '')
    using module '.\Tokenizer.psm1'
    
    Describe "Tokenizing an in-fix expression" {
        It "Should enummerate <expression> into <expected>" -TestCase @(
            @{expression = '42'; expected = @('42')}
            @{expression = '123+'; expected = @('123', '+')}
            @{expression = '99*34'; expected = @('99', '*', '34')}
            @{expression = '1+2+3+4'; expected = @('1', '+', '2', '+', '3', '+', '4')}
            @{expression = 'a'; expected = @('a')}
            @{expression = 'foo+bar'; expected = @('foo', '+', 'bar')}
            @{expression = '++foo'; expected = @('++', 'foo')}
            @{expression = '   foo  + -bar  = baz   '; expected = @('foo', '+', '-', 'bar', '=', 'baz')}
            @{expression = '(a)'; expected = @('(', 'a', ')')}
            @{expression = '(())'; expected = @('(', '(', ')', ')')}
            @{expression = 'f(g(3))'; expected = @('f(', 'g(', '3', ')', ')')}
        ) {
            param($expression, $expected)
            $tokenizer = [Tokenizer]::new($expression)
    
            for($i = 0; $i -lt $expected.Count; ++$i) {
                $tokenizer.MoveNext()
                $tokenizer.Current | Should be $expected[$i]
            }
            $tokenizer.MoveNext() | Should be $false
        } 
    }
    using namespace System.Collections
    
    class Tokenizer : IEnumerable, IEnumerator {
        static $PARENTHESIS = '^([()])' 
        static $NUMBERS_WORDS_FUNCTIONS = '^([\d\w]+\({0,1})'
        static $OPERATORS = '^([^\d\w\s]+)'
        static [Array]$REGEX = @( [Tokenizer]::PARENTHESIS, [Tokenizer]::NUMBERS_WORDS_FUNCTIONS, [Tokenizer]::OPERATORS )
    
        [String]$currentExpression
        [String]$currentToken
    
        Tokenizer($expression) {
            $this.currentExpression = $expression
        }
    
        [IEnumerator]GetEnumerator() {
            return $this
        }
    
        [bool]MoveNext() {
            $this.currentToken = $null
    
            $this.currentExpression = $this.currentExpression -replace ('^\s+', '')
            foreach ($r in [Tokenizer]::REGEX) {
                if ($this.currentExpression -match $r) {
                    $this.currentToken = $Matches[1]
                    $this.currentExpression = $this.currentExpression.Substring($this.currentToken.Length)
                    break
                }
            }
            return $this.currentExpression.Length -gt 0
        }
    
        [Object]get_Current() {
            return $this.currentToken
        }
    
        [void]Reset() {
        }
    }

Notice that we have no tests for Reset? It is required to get the code to run but we don’t use it in a test. Time to add a missing test and write its implementation.

    It "Should be possible to go through the results after a reset" {
        $tokenizer = [Tokenizer]::new("42")
        $tokenizer.MoveNext()
        $tokenizer.Current | Should be "42"
        $tokenizer.Reset()
        $tokenizer.MoveNext()
        $tokenizer.Current | Should be "42"
    }
    [-] Should be possible to go through the results after a reset 81ms
      Expected string length 2 but was 0. Strings differ at index 0.
      Expected: {42}
      But was:  {}
      -----------^
      33:         $tokenizer.Current | Should be "42"
      at Invoke-LegacyAssertion, C:\Program Files\WindowsPowerShell\Modules\Pester\4.0.8\Functions\Assertions\Should.ps1: line 190
      at <ScriptBlock>, C:\Users\Brett\src\shunting_yard_powershell_3\Tokenizer.Tests.ps1: line 33
    [String]$currentExpression
    [String]$currentToken
    [String]$originalExpression

    Tokenizer($expression) {
        $this.originalExpression = $expression
        $this.Reset()
    }
# ...
    [void]Reset() {
        $this.currentExpression = $this.originalExpression
    }

Up


Comments

" Creative Commons License
This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.