| Class | regex |
| In: |
regex.f90
|
サブルーチン match により正規表現を用いた文字列マッチを行うことが 可能です。
| Subroutine : | |
| pattern : | character(len = *), intent(in) |
| text : | character(len = *), intent(in) |
| start : | integer, intent(out) |
| length : | integer, intent(out) |
pattern には正規表現を与えます。 text には正規表現によって探査したい文字列を与えます。
pattern が text にマッチした場合、 start には文字列の何文字目からマッチしたのかを示す数値 (正の整数) が返ります。 length には何文字分マッチしたのかを示す数値 (正の整数) が返ります。
マッチしない場合、 length == -1, start == 0 となります。
program regex_test
use regex, only: match
use dc_types, only: TOKEN
implicit none
integer:: start, length
character(TOKEN) :: pattern, text
continue
pattern = "->"
text = "time->0.0,x->hoge"
call match(trim(pattern), trim(text), start, length)
call formatted_print
pattern = "^##+"
text = "####### hoge"
call match(trim(pattern), trim(text), start, length)
call formatted_print
pattern = "@+$"
text = "# hoge @@@"
call match(trim(pattern), trim(text), start, length)
call formatted_print
contains
subroutine formatted_print
use dc_string, only: Printf
call Printf(fmt='pattern= %c : text= %c : start= %d : length= %d', &
& c1=trim(pattern), c2=trim(text), i=(/start, length/))
end subroutine formatted_print
end program regex_test
このプログラムを実行することで以下の出力が得られるはずです。
pattern= -> : text= time->0.0,x->hoge : start= 5 : length= 2
pattern= ^##+ : text= ####### hoge : start= 1 : length= 7
pattern= @+$ : text= # hoge @@@ : start= 8 : length= 3
subroutine match(pattern, text, start, length)
!
! _pattern_ には正規表現を与えます。
! _text_ には正規表現によって探査したい文字列を与えます。
!
! _pattern_ が _text_ にマッチした場合、
! _start_ には文字列の何文字目からマッチしたのかを示す数値 (正の整数)
! が返ります。
! _length_ には何文字分マッチしたのかを示す数値 (正の整数)
! が返ります。
!
! マッチしない場合、 length == -1, start == 0 となります。
!
!
!=== 例
!
! program regex_test
! use regex, only: match
! use dc_types, only: TOKEN
! implicit none
!
! integer:: start, length
! character(TOKEN) :: pattern, text
! continue
! pattern = "->"
! text = "time->0.0,x->hoge"
! call match(trim(pattern), trim(text), start, length)
! call formatted_print
!
! pattern = "^##+"
! text = "####### hoge"
! call match(trim(pattern), trim(text), start, length)
! call formatted_print
!
! pattern = "@+$"
! text = "# hoge @@@"
! call match(trim(pattern), trim(text), start, length)
! call formatted_print
!
! contains
! subroutine formatted_print
! use dc_string, only: Printf
! call Printf(fmt='pattern= %c : text= %c : start= %d : length= %d', &
! & c1=trim(pattern), c2=trim(text), i=(/start, length/))
! end subroutine formatted_print
!
! end program regex_test
!
! このプログラムを実行することで以下の出力が得られるはずです。
!
! pattern= -> : text= time->0.0,x->hoge : start= 5 : length= 2
! pattern= ^##+ : text= ####### hoge : start= 1 : length= 7
! pattern= @+$ : text= # hoge @@@ : start= 8 : length= 3
!
implicit none
character(len = *), intent(in):: pattern, text
integer, intent(out):: start, length
integer, allocatable:: ipattern(:)
integer:: text_length
continue
! 空 pattern は空文字列に適合
if (len(pattern) <= 0) then
length = 0
start = 1
return
endif
! メタキャラクタの認識
allocate(ipattern(len(pattern) + 2))
call preprocess_pattern(pattern, ipattern)
! 頭寄せ指定のある場合
if (ipattern(1) == SYM_HEADFIX) then
start = 1
call match_here(ipattern(2: ), text, length)
if (length < 0) goto 995
goto 999
endif
! 最左原理
text_length = len(text)
do, start = 1, text_length + 1
call match_here(ipattern, text(start:text_length), length)
if (length >= 0) goto 999
end do
! みつからない場合
995 continue
start = 0
length = -1
999 continue
deallocate(ipattern)
end subroutine match
| Function : | |
| result : | logical |
| ipat(:) : | integer, intent(in) |
| c : | character(len=*), intent(in) |
logical function hit(ipat, c) result(result)
integer, intent(in):: ipat(:)
character(len=*), intent(in):: c
character(len=*), parameter:: DIGIT = "0123456789", XDIGIT = "ABCDEFabcdef", ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
integer:: i
continue
do, i = 1, size(ipat)
select case(ipat(i))
case(SYM_ANYCHAR)
result = .TRUE.
case(SYM_ISALPHA)
result = (index(ALPHA, c) > 0)
case(SYM_ISDIGIT)
result = (index(DIGIT, c) > 0)
case(SYM_ISWORD)
result = (index(DIGIT, c) > 0 .or. index(ALPHA, c) > 0 .or. c == '_')
case(SYM_ISXDIGIT)
result = (index(DIGIT, c) > 0 .or. index(XDIGIT, c) > 0)
case(SYM_ISSPACE)
result = (c == ' ' .or. (iachar(c) >= 8 .and. iachar(c) <= 13))
case default
result = (ipat(i) == ichar(c))
end select
if (result) return
enddo
result = .FALSE.
end function hit
| Subroutine : | recursive |
| ipat(:) : | integer, intent(in) |
| text : | character(len = *), intent(in) |
| length : | integer, intent(out) |
recursive subroutine match_here(ipat, text, length)
integer, intent(in):: ipat(:)
character(len = *), intent(in):: text
integer, intent(out):: length
integer:: s1, s2, remain, i, hitmax, hitcount, hit_at_least
logical:: normal_hit
continue
! パターンの終わり。空パターンには何でもマッチ
if (size(ipat) == 0 .or. ipat(1) == SYM_EOL) then
length = 0
return
endif
! パターンの文末固定指示
if (ipat(1) == SYM_TAILFIX) then
if (text == "") then
length = 0
else
length = -1
endif
return
endif
if (len(text) == 0) then
length = -1
return
endif
! 1字指定(範囲または1字リテラル)の抽出 ... ipat(s1:s2)
if (ipat(1) == SYM_NORMAL_SET) then
s1 = 3
s2 = 2 + ipat(2) - SYM_COUNT_BASE
normal_hit = .TRUE.
else if (ipat(1) == SYM_REVERSED_SET) then
s1 = 3
s2 = 2 + ipat(2) - SYM_COUNT_BASE
normal_hit = .FALSE.
else
s1 = 1
s2 = 1
normal_hit = .TRUE.
endif
! その次の記号 ipat(s2+1) は量化子か次の1字指定である
remain = s2 + 2
select case (ipat(s2 + 1))
case(SYM_STAR)
hitmax = len(text)
hit_at_least = 0
case(SYM_PLUS)
hitmax = len(text)
hit_at_least = 1
case(SYM_QUESTION)
hitmax = 1
hit_at_least = 0
case default
hitmax = 1
hit_at_least = 1
remain = s2 + 1
end select
! 現位置以降の1字指定のヒット数を数える
hitcount = 0
do, i = 1, hitmax
if (hit(ipat(s1:s2), text(i:i)) .neqv. normal_hit) then
exit
endif
hitcount = i
enddo
! 現位置で無ヒットの場合、ヒットを要するならマッチ失敗
if (hitcount < hit_at_least) then
length = -1
return
endif
! 最長原理: なるべく長くヒットしたものから、残りのマッチする
! ものを探す。いわゆる最左最長探索の最長である。
do, i = 1 + hitcount, 1 + hit_at_least, -1
call match_here(ipat(remain: ), text(i: ), length)
if (length >= 0) then
length = length + i - 1
return
endif
enddo
length = -1
end subroutine match_here
| Subroutine : | |
| pattern : | character(len = *), intent(in) |
| symbols(:) : | integer, intent(out) |
メタキャラクタと普通の文字を分離
subroutine preprocess_pattern(pattern, symbols)
!
! メタキャラクタと普通の文字を分離
!
character(len = *), intent(in):: pattern
integer, intent(out):: symbols(:)
integer:: i, j, code, imax, j_last_set
integer:: status, stat_return
integer, parameter:: STAT_INIT = 1, STAT_ESCAPE = 2, STAT_OPEN_SET = 3, STAT_IN_SET = 4, STAT_HEXADECIMAL = 5
character:: c
continue
status = STAT_INIT
stat_return = STAT_INIT
symbols(:) = SYM_EOL
j_last_set = 0
imax = len_trim(pattern)
j = 1
do, i = 1, imax
c = pattern(i:i)
select case(status)
case(STAT_INIT)
if (c == C_ESCAPE) then
status = STAT_ESCAPE
cycle
else if (c == "[") then
symbols(j) = SYM_NORMAL_SET
status = STAT_OPEN_SET
else if (c == ".") then
symbols(j) = SYM_ANYCHAR
else if (c == "?") then
symbols(j) = SYM_QUESTION
else if (c == "+") then
symbols(j) = SYM_PLUS
else if (c == "*") then
symbols(j) = SYM_STAR
else if (c == "^" .and. i == 1) then
symbols(j) = SYM_HEADFIX
else if (c == "$" .and. i == imax) then
symbols(j) = SYM_TAILFIX
else
symbols(j) = ichar(c)
endif
case(STAT_ESCAPE)
if (c == 'd' .or. c == 'D') then
symbols(j) = SYM_ISDIGIT
else if (c == 'a' .or. c == 'A') then
symbols(j) = SYM_ISALPHA
else if (c == 'w' .or. c == 'W') then
symbols(j) = SYM_ISWORD
else if (c == 's' .or. c == 'S') then
symbols(j) = SYM_ISSPACE
else if (c == 'z' .or. c == 'Z') then
symbols(j) = SYM_ISXDIGIT
else if (c == 'x' .or. c == 'X') then
symbols(j) = -1
status = STAT_HEXADECIMAL
cycle
else
symbols(j) = ichar(c)
end if
status = stat_return
case(STAT_HEXADECIMAL)
code = index("123456789ABCDEFabcdef", c)
if (code >= 16) code = code - 6
if (symbols(j) == -1) then
symbols(j) = code
cycle
else
symbols(j) = symbols(j) * 16 + code
status = stat_return
endif
case(STAT_OPEN_SET)
symbols(j) = SYM_COUNT_BASE
j_last_set = j
stat_return = STAT_IN_SET
if (c == '^') then
symbols(j - 1) = SYM_REVERSED_SET
status = STAT_IN_SET
else if (c == C_ESCAPE) then
status = STAT_ESCAPE
else
j = j + 1
symbols(j) = ichar(c)
status = STAT_IN_SET
endif
case(STAT_IN_SET)
if (c == ']') then
symbols(j_last_set) = SYM_COUNT_BASE + j - j_last_set - 1
stat_return = STAT_INIT
status = STAT_INIT
cycle
else if (c == C_ESCAPE) then
status = STAT_ESCAPE
cycle
else
symbols(j) = ichar(c)
endif
end select
j = j + 1
enddo
select case(status)
case(STAT_ESCAPE)
symbols(j) = ichar(' ')
case(STAT_OPEN_SET)
symbols(j) = SYM_COUNT_BASE
case(STAT_IN_SET)
symbols(j_last_set) = SYM_COUNT_BASE + j - j_last_set - 1
end select
end subroutine preprocess_pattern