! Fortran 90 による簡略正規表現ライブラリ

! 正規表現と称しているが、有限オートマトンを構築する手間をはぶくため、
! 和演算は単独文字間に限っている。つまり、[...] はあるが | はない。

module regex

    use dc_string
    implicit none

    integer, parameter:: CHAR_MAX = 255
    integer, parameter:: SYMBOL_EPSILON = CHAR_MAX + 1
    integer, parameter:: SYMBOL_EOL = CHAR_MAX + 2

    integer, parameter:: INT_WIDTH = bit_size(0)
    integer, parameter:: SYMTABLEN = (SYMBOL_EOL - 1) / INT_WIDTH + 1

    type regbranch
        integer:: symtab(SYMTABLEN)
        type(regstat), pointer:: next
        type(regbranch), pointer:: another
        logical:: cyclic
    end type

    type regstat
        logical:: living
        logical:: next_life
        type(regbranch), pointer:: choice
    end type

    type(regstat), pointer, save:: automata
    logical, save:: automata_built = .FALSE.
    logical, save:: automata_headed = .FALSE.

    interface match
        module procedure  match_cc
    end interface

contains

    recursive subroutine pat_dispose(stat)
        type(regstat), pointer:: stat
        type(regbranch), pointer:: branch, branch_backup
        branch => stat%choice
        do, while(associated(branch))
            if (.not. branch%cyclic) call pat_dispose(branch%next)
            branch_backup => branch
            branch => branch%another
            deallocate(branch_backup)
        enddo
        deallocate(stat)
    end subroutine

    logical function accept(branch, code) result(result)
        type(REGBRANCH), intent(in):: branch
        integer, intent(in):: code
        integer:: idx, ishift
        idx = code / INT_WIDTH
        ishift = mod(code, INT_WIDTH)
        result = btest(branch%symtab(idx), ishift)
    end function

    subroutine branch_clear(branch)
        type(REGBRANCH), intent(out):: branch
        nullify(branch%another, branch%next)
        branch%cyclic = .FALSE.
        branch%symtab(:) = 0
    end subroutine

    subroutine branch_setchar(branch, c)
        character, intent(in):: c
        type(REGBRANCH), intent(inout):: branch
        integer:: idx, ishift
        idx = ichar(c) / INT_WIDTH
        ishift = mod(ichar(c), INT_WIDTH)
        branch%symtab(idx) = ibset(branch%symtab(idx), ishift)
    end subroutine

    subroutine branch_negate(branch)
        type(REGBRANCH), intent(inout):: branch
        branch%symtab(:) = not(branch%symtab(:))
    end subroutine

    integer function element_add(cursor, pat) result(result)
        character(len = *), intent(in):: pat
        type(regstat), pointer:: cursor
        integer:: i
        allocate(cursor%choice)
        call branch_clear(cursor%choice) 
        if (pat(1:1) == '.') then
            call branch_negate(cursor%choice)
        else if (pat(1:1) == '[' .and. index(pat, ']') > 2) then
            result = index(pat(3: ), ']') + 2
            do, i = 2, result - 1
                call branch_stetchar(cursor%choice, pat(i:i))
            enddo
        else
            call branch_setchar(cursor%choice, pat(1:1))
        endif
        allocate(cursor%choice%next)
        cursor => cursor%choice%next
        nullify(cursor%choice)
        result = 1
    end function

    subroutine pat_compile(pat)
        character(len = *), intent(in):: pat
        type(regstat), pointer:: cursor
        integer:: i
        if (.not. automata_built) then
            nullify(automata)
            automata_built = .TRUE.
        endif
        if (associated(automata)) call pat_dispose(automata)
        if (pat(1:1) == '^') then
            automata_headed = .TRUE.
            i = 2
        else
            automata_headed = .FALSE.
            i = 1
        endif
        allocate(automata)
        cursor => automata
        nullify(cursor%choice)
        do, while (i < len(pat))
            i = i + element_add(cursor, pat(i: ))
        enddo
   end subroutine


    logical function match_cc(test, pat) result(result)
        character(len = *), intent(in):: test
        character(len = *), intent(in):: pat
        result = .TRUE.
    end function

end module