dc_regex Module Reference

Detailed Description

シンプルな正規表現関数 'match' を提供します.

Author
Youhei SASAKI

Functions/Subroutines

subroutine preprocess_pattern (pattern, symbols)
 
recursive subroutine match_here (ipat, text, length)
 
logical function hit (ipat, c)
 
subroutine, public match (pattern, text, start, length)
 

Variables

character, save c_escape = '#'
 
integer, parameter sym_eol = -128
 
integer, parameter sym_anychar = 500
 
integer, parameter sym_question = 501
 
integer, parameter sym_plus = 502
 
integer, parameter sym_star = 503
 
integer, parameter sym_normal_set = 520
 
integer, parameter sym_reversed_set = 521
 
integer, parameter sym_headfix = 540
 
integer, parameter sym_tailfix = 541
 
integer, parameter sym_isdigit = 560
 
integer, parameter sym_isalpha = 561
 
integer, parameter sym_isword = 562
 
integer, parameter sym_isspace = 563
 
integer, parameter sym_isxdigit = 564
 
integer, parameter sym_count_base = 1000
 

Function/Subroutine Documentation

◆ hit()

logical function dc_regex::hit ( integer, dimension(:), intent(in)  ipat,
character(len=*), intent(in)  c 
)
private

Definition at line 235 of file dc_regex.f90.

References sym_anychar, sym_isalpha, sym_isdigit, sym_isspace, sym_isword, and sym_isxdigit.

Referenced by match_here().

235  integer, intent(in):: ipat(:)
236  character(len=*), intent(in):: c
237  character(len=*), parameter:: &
238  & DIGIT = "0123456789", &
239  & XDIGIT = "ABCDEFabcdef", &
240  & ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
241  integer:: i
242  continue
243  do, i = 1, size(ipat)
244  select case(ipat(i))
245  case(sym_anychar)
246  result = .true.
247  case(sym_isalpha)
248  result = (index(alpha, c) > 0)
249  case(sym_isdigit)
250  result = (index(digit, c) > 0)
251  case(sym_isword)
252  result = (index(digit, c) > 0 .or. index(alpha, c) > 0 .or. &
253  & c == '_')
254  case(sym_isxdigit)
255  result = (index(digit, c) > 0 .or. index(xdigit, c) > 0)
256  case(sym_isspace)
257  result = (c == ' ' .or. (iachar(c) >= 8 .and. iachar(c) <= 13))
258  case default
259  result = (ipat(i) == ichar(c))
260  end select
261  if (result) return
262  enddo
263  result = .false.
Here is the caller graph for this function:

◆ match()

subroutine, public dc_regex::match ( character(len = *), intent(in)  pattern,
character(len = *), intent(in)  text,
integer, intent(out)  start,
integer, intent(out)  length 
)

Definition at line 267 of file dc_regex.f90.

References match_here(), preprocess_pattern(), and sym_headfix.

Referenced by actual_iorange_dump(), dccaldatecreate1(), dccaldateparsestr1(), dcunits_com::dcunitsgettoken(), historygetdouble0(), historygetdouble1(), historygetdouble2(), historygetdouble3(), historygetdouble4(), historygetdouble5(), historygetdouble6(), historygetdouble7(), historygetint0(), historygetint1(), historygetint2(), historygetint3(), historygetint4(), historygetint5(), historygetint6(), historygetint7(), historygetreal0(), historygetreal1(), historygetreal2(), historygetreal3(), historygetreal4(), historygetreal5(), historygetreal6(), historygetreal7(), and lookup_growable_url().

267  !
268  ! _pattern_ には正規表現を与えます。
269  ! _text_ には正規表現によって探査したい文字列を与えます。
270  !
271  ! _pattern_ が _text_ にマッチした場合、
272  ! _start_ には文字列の何文字目からマッチしたのかを示す数値 (正の整数)
273  ! が返ります。
274  ! _length_ には何文字分マッチしたのかを示す数値 (正の整数)
275  ! が返ります。
276  !
277  ! マッチしない場合、 length == -1, start == 0 となります。
278  !
279  !
280  !=== 例
281  !
282  ! program regex_test
283  ! use dc_regex, only: match
284  ! use dc_types, only: TOKEN
285  ! implicit none
286  !
287  ! integer:: start, length
288  ! character(TOKEN) :: pattern, text
289  ! continue
290  ! pattern = "->"
291  ! text = "time->0.0,x->hoge"
292  ! call match(trim(pattern), trim(text), start, length)
293  ! call formatted_print
294  !
295  ! pattern = "^##+"
296  ! text = "####### hoge"
297  ! call match(trim(pattern), trim(text), start, length)
298  ! call formatted_print
299  !
300  ! pattern = "@+$"
301  ! text = "# hoge @@@"
302  ! call match(trim(pattern), trim(text), start, length)
303  ! call formatted_print
304  !
305  ! contains
306  ! subroutine formatted_print
307  ! use dc_string, only: Printf
308  ! call Printf(fmt='pattern= %c : text= %c : start= %d : length= %d', &
309  ! & c1=trim(pattern), c2=trim(text), i=(/start, length/))
310  ! end subroutine formatted_print
311  !
312  ! end program regex_test
313  !
314  ! このプログラムを実行することで以下の出力が得られるはずです。
315  !
316  ! pattern= -> : text= time->0.0,x->hoge : start= 5 : length= 2
317  ! pattern= ^##+ : text= ####### hoge : start= 1 : length= 7
318  ! pattern= @+$ : text= # hoge @@@ : start= 8 : length= 3
319  !
320  implicit none
321  character(len = *), intent(in):: pattern, text
322  integer, intent(out):: start, length
323  integer, allocatable:: ipattern(:)
324  integer:: text_length
325  continue
326  ! 空 pattern は空文字列に適合
327  if (len(pattern) <= 0) then
328  length = 0
329  start = 1
330  return
331  endif
332  ! メタキャラクタの認識
333  allocate(ipattern(len(pattern) + 2))
334  call preprocess_pattern(pattern, ipattern)
335  ! 頭寄せ指定のある場合
336  if (ipattern(1) == sym_headfix) then
337  start = 1
338  call match_here(ipattern(2: ), text, length)
339  if (length < 0) goto 995
340  goto 999
341  endif
342  ! 最左原理
343  text_length = len(text)
344  do, start = 1, text_length + 1
345  call match_here(ipattern, text(start:text_length), length)
346  if (length >= 0) goto 999
347  end do
348  ! みつからない場合
349 995 continue
350  start = 0
351  length = -1
352 999 continue
353  deallocate(ipattern)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ match_here()

recursive subroutine dc_regex::match_here ( integer, dimension(:), intent(in)  ipat,
character(len = *), intent(in)  text,
integer, intent(out)  length 
)
private

Definition at line 154 of file dc_regex.f90.

References hit(), sym_count_base, sym_eol, sym_normal_set, sym_plus, sym_question, sym_reversed_set, sym_star, and sym_tailfix.

Referenced by match().

154  integer, intent(in):: ipat(:)
155  character(len = *), intent(in):: text
156  integer, intent(out):: length
157  integer:: s1, s2, remain, i, hitmax, hitcount, hit_at_least
158  logical:: normal_hit
159  continue
160  ! パターンの終わり。空パターンには何でもマッチ
161  if (size(ipat) == 0 .or. ipat(1) == sym_eol) then
162  length = 0
163  return
164  endif
165  ! パターンの文末固定指示
166  if (ipat(1) == sym_tailfix) then
167  if (text == "") then
168  length = 0
169  else
170  length = -1
171  endif
172  return
173  endif
174  if (len(text) == 0) then
175  length = -1
176  return
177  endif
178  ! 1字指定(範囲または1字リテラル)の抽出 ... ipat(s1:s2)
179  if (ipat(1) == sym_normal_set) then
180  s1 = 3
181  s2 = 2 + ipat(2) - sym_count_base
182  normal_hit = .true.
183  else if (ipat(1) == sym_reversed_set) then
184  s1 = 3
185  s2 = 2 + ipat(2) - sym_count_base
186  normal_hit = .false.
187  else
188  s1 = 1
189  s2 = 1
190  normal_hit = .true.
191  endif
192  ! その次の記号 ipat(s2+1) は量化子か次の1字指定である
193  remain = s2 + 2
194  select case (ipat(s2 + 1))
195  case(sym_star)
196  hitmax = len(text)
197  hit_at_least = 0
198  case(sym_plus)
199  hitmax = len(text)
200  hit_at_least = 1
201  case(sym_question)
202  hitmax = 1
203  hit_at_least = 0
204  case default
205  hitmax = 1
206  hit_at_least = 1
207  remain = s2 + 1
208  end select
209  ! 現位置以降の1字指定のヒット数を数える
210  hitcount = 0
211  do, i = 1, hitmax
212  if (hit(ipat(s1:s2), text(i:i)) .neqv. normal_hit) then
213  exit
214  endif
215  hitcount = i
216  enddo
217  ! 現位置で無ヒットの場合、ヒットを要するならマッチ失敗
218  if (hitcount < hit_at_least) then
219  length = -1
220  return
221  endif
222  ! 最長原理: なるべく長くヒットしたものから、残りのマッチする
223  ! ものを探す。いわゆる最左最長探索の最長である。
224  do, i = 1 + hitcount, 1 + hit_at_least, -1
225  call match_here(ipat(remain: ), text(i: ), length)
226  if (length >= 0) then
227  length = length + i - 1
228  return
229  endif
230  enddo
231  length = -1
Here is the call graph for this function:
Here is the caller graph for this function:

◆ preprocess_pattern()

subroutine dc_regex::preprocess_pattern ( character(len = *), intent(in)  pattern,
integer, dimension(:), intent(out)  symbols 
)
private

Definition at line 41 of file dc_regex.f90.

References c_escape, sym_anychar, sym_count_base, sym_eol, sym_headfix, sym_isalpha, sym_isdigit, sym_isspace, sym_isword, sym_isxdigit, sym_normal_set, sym_plus, sym_question, sym_reversed_set, sym_star, and sym_tailfix.

Referenced by match().

41  !
42  ! メタキャラクタと普通の文字を分離
43  !
44  character(len = *), intent(in):: pattern
45  integer, intent(out):: symbols(:)
46  integer:: i, j, code, imax, j_last_set
47  integer:: status, stat_return
48  integer, parameter:: stat_init = 1, stat_escape = 2, &
49  stat_open_set = 3, stat_in_set = 4, stat_hexadecimal = 5
50  character:: c
51  continue
52  status = stat_init
53  stat_return = stat_init
54  symbols(:) = sym_eol
55  j_last_set = 0
56  imax = len_trim(pattern)
57  j = 1
58  do, i = 1, imax
59  c = pattern(i:i)
60  select case(status)
61  case(stat_init)
62  if (c == c_escape) then
63  status = stat_escape
64  cycle
65  else if (c == "[") then
66  symbols(j) = sym_normal_set
67  status = stat_open_set
68  else if (c == ".") then
69  symbols(j) = sym_anychar
70  else if (c == "?") then
71  symbols(j) = sym_question
72  else if (c == "+") then
73  symbols(j) = sym_plus
74  else if (c == "*") then
75  symbols(j) = sym_star
76  else if (c == "^" .and. i == 1) then
77  symbols(j) = sym_headfix
78  else if (c == "$" .and. i == imax) then
79  symbols(j) = sym_tailfix
80  else
81  symbols(j) = ichar(c)
82  endif
83  case(stat_escape)
84  if (c == 'd' .or. c == 'D') then
85  symbols(j) = sym_isdigit
86  else if (c == 'a' .or. c == 'A') then
87  symbols(j) = sym_isalpha
88  else if (c == 'w' .or. c == 'W') then
89  symbols(j) = sym_isword
90  else if (c == 's' .or. c == 'S') then
91  symbols(j) = sym_isspace
92  else if (c == 'z' .or. c == 'Z') then
93  symbols(j) = sym_isxdigit
94  else if (c == 'x' .or. c == 'X') then
95  symbols(j) = -1
96  status = stat_hexadecimal
97  cycle
98  else
99  symbols(j) = ichar(c)
100  end if
101  status = stat_return
102  case(stat_hexadecimal)
103  code = index("123456789ABCDEFabcdef", c)
104  if (code >= 16) code = code - 6
105  if (symbols(j) == -1) then
106  symbols(j) = code
107  cycle
108  else
109  symbols(j) = symbols(j) * 16 + code
110  status = stat_return
111  endif
112  case(stat_open_set)
113  symbols(j) = sym_count_base
114  j_last_set = j
115  stat_return = stat_in_set
116  if (c == '^') then
117  symbols(j - 1) = sym_reversed_set
118  status = stat_in_set
119  else if (c == c_escape) then
120  status = stat_escape
121  else
122  j = j + 1
123  symbols(j) = ichar(c)
124  status = stat_in_set
125  endif
126  case(stat_in_set)
127  if (c == ']') then
128  symbols(j_last_set) = sym_count_base + j - j_last_set - 1
129  stat_return = stat_init
130  status = stat_init
131  cycle
132  else if (c == c_escape) then
133  status = stat_escape
134  cycle
135  else
136  symbols(j) = ichar(c)
137  endif
138  end select
139  j = j + 1
140  enddo
141  select case(status)
142  case(stat_escape)
143  symbols(j) = ichar(' ')
144  case(stat_open_set)
145  symbols(j) = sym_count_base
146  case(stat_in_set)
147  symbols(j_last_set) = sym_count_base + j - j_last_set - 1
148  end select
Here is the caller graph for this function:

Variable Documentation

◆ c_escape

character, save dc_regex::c_escape = '#'
private

Definition at line 21 of file dc_regex.f90.

Referenced by preprocess_pattern().

21  character, save :: c_escape = '#'

◆ sym_anychar

integer, parameter dc_regex::sym_anychar = 500
private

Definition at line 23 of file dc_regex.f90.

Referenced by hit(), and preprocess_pattern().

23  integer, parameter :: sym_anychar = 500

◆ sym_count_base

integer, parameter dc_regex::sym_count_base = 1000
private

Definition at line 36 of file dc_regex.f90.

Referenced by match_here(), and preprocess_pattern().

36  integer, parameter :: sym_count_base = 1000

◆ sym_eol

integer, parameter dc_regex::sym_eol = -128
private

Definition at line 22 of file dc_regex.f90.

Referenced by match_here(), and preprocess_pattern().

22  integer, parameter :: sym_eol = -128

◆ sym_headfix

integer, parameter dc_regex::sym_headfix = 540
private

Definition at line 29 of file dc_regex.f90.

Referenced by match(), and preprocess_pattern().

29  integer, parameter :: sym_headfix = 540

◆ sym_isalpha

integer, parameter dc_regex::sym_isalpha = 561
private

Definition at line 32 of file dc_regex.f90.

Referenced by hit(), and preprocess_pattern().

32  integer, parameter :: sym_isalpha = 561

◆ sym_isdigit

integer, parameter dc_regex::sym_isdigit = 560
private

Definition at line 31 of file dc_regex.f90.

Referenced by hit(), and preprocess_pattern().

31  integer, parameter :: sym_isdigit = 560

◆ sym_isspace

integer, parameter dc_regex::sym_isspace = 563
private

Definition at line 34 of file dc_regex.f90.

Referenced by hit(), and preprocess_pattern().

34  integer, parameter :: sym_isspace = 563

◆ sym_isword

integer, parameter dc_regex::sym_isword = 562
private

Definition at line 33 of file dc_regex.f90.

Referenced by hit(), and preprocess_pattern().

33  integer, parameter :: sym_isword = 562

◆ sym_isxdigit

integer, parameter dc_regex::sym_isxdigit = 564
private

Definition at line 35 of file dc_regex.f90.

Referenced by hit(), and preprocess_pattern().

35  integer, parameter :: sym_isxdigit = 564

◆ sym_normal_set

integer, parameter dc_regex::sym_normal_set = 520
private

Definition at line 27 of file dc_regex.f90.

Referenced by match_here(), and preprocess_pattern().

27  integer, parameter :: sym_normal_set = 520

◆ sym_plus

integer, parameter dc_regex::sym_plus = 502
private

Definition at line 25 of file dc_regex.f90.

Referenced by match_here(), and preprocess_pattern().

25  integer, parameter :: sym_plus = 502

◆ sym_question

integer, parameter dc_regex::sym_question = 501
private

Definition at line 24 of file dc_regex.f90.

Referenced by match_here(), and preprocess_pattern().

24  integer, parameter :: sym_question = 501

◆ sym_reversed_set

integer, parameter dc_regex::sym_reversed_set = 521
private

Definition at line 28 of file dc_regex.f90.

Referenced by match_here(), and preprocess_pattern().

28  integer, parameter :: sym_reversed_set = 521

◆ sym_star

integer, parameter dc_regex::sym_star = 503
private

Definition at line 26 of file dc_regex.f90.

Referenced by match_here(), and preprocess_pattern().

26  integer, parameter :: sym_star = 503

◆ sym_tailfix

integer, parameter dc_regex::sym_tailfix = 541
private

Definition at line 30 of file dc_regex.f90.

Referenced by match_here(), and preprocess_pattern().

30  integer, parameter :: sym_tailfix = 541