summaryrefslogtreecommitdiff
path: root/fpcsrc/tests/bench/shootout/src/regexdna.pp
blob: e439b3691f4317026a23ed4def02f7db4d25dd16 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
{ The Computer Language Benchmarks Game
  http://shootout.alioth.debian.org

  contributed by Steve Fisher
  modified by Peter Vreman

  compile with
  fpc -O3 regex-dna.pp
}

uses regexpr,strutils;

function replace_matches( const target: pchar;  const repl: ansistring;
                const str: ansistring;  var dest: ansistring ): longint;
var
  engine : tRegexprEngine;
  count, index, size : longint;
  pstart : pchar;
  starti : longint;
begin
  if not GenerateRegExprEngine( target, [], engine) then
  begin
    writeln( 'Failed to generate regex. engine for "',target,'".' );
    halt(1)
  end;
  count := 0;
  dest := '';
  starti := 1;
  pstart := pchar(str);
  while starti <= length(str) do
  begin
    if RegExprPos(engine, pstart, index, size ) then
    begin
      inc(count);
      dest := dest + Copy( str, starti, index) + repl;
      inc(pstart,index+size);
      inc(starti,index+size);
    end
    else
      break
  end;
  DestroyRegExprEngine( engine );
  dest:=dest+Copy( str, starti, length(str)-starti+1);
  exit(count);
end;


function count_matches( target: pchar; const str: ansistring ): longint;
var
  engine : tRegexprEngine;
  pstart : pchar;
  starti,
  count, index, size : longint;
begin
  if not GenerateRegExprEngine( target, [ref_caseinsensitive], engine) then
  begin
    writeln( 'Failed to generate regex. engine for "',target,'".' );
    halt(1)
  end;
  count := 0;
  pstart := pchar(str);
  starti := 1;
  while starti <= length(str) do
  begin
    if RegExprPos(engine, pstart, index, size ) then
    begin
      inc(count);
      inc(pstart,index+size);
      inc(starti,index+size);
    end
    else
      break
  end;
  DestroyRegExprEngine( engine );
  exit(count)
end;

const
  patterns : array[1..9] of pchar =
    (
      '(agggtaaa)|(tttaccct)',
      '([cgt]gggtaaa)|(tttaccc[acg])',
      '(a[act]ggtaaa)|(tttacc[agt]t)',
      '(ag[act]gtaaa)|(tttac[agt]ct)',
      '(agg[act]taaa)|(ttta[agt]cct)',
      '(aggg[acg]aaa)|(ttt[cgt]ccct)',
      '(agggt[cgt]aa)|(tt[acg]accct)',
      '(agggta[cgt]a)|(t[acg]taccct)',
      '(agggtaa[cgt])|([acg]ttaccct)'
    );
  replacements : array[1..11,1..2] of pchar =
  (
    ('B', '(c|g|t)'), ('D', '(a|g|t)'), ('H', '(a|c|t)'), ('K', '(g|t)'),
    ('M', '(a|c)'), ('N', '(a|c|g|t)'), ('R', '(a|g)'), ('S', '(c|t)'),
    ('V', '(a|c|g)'), ('W', '(a|t)'), ('Y', '(c|t)')
  );


var
  pattern : pchar;
  sequence, new_seq : ansiString;
  line, tmp: string[255];
  letter, repl : pchar;
  i, count, init_length, clean_length, reps : longint;
  inbuf : array[0..64*1024] of char;
begin
  settextbuf(input,inbuf);
  sequence := '';
  init_length := 0;
  while not eof do
  begin
    readln( line );
    init_length += length( line ) + 1;
    if line[1] <> '>' then
      sequence := sequence + line;
  end;
  clean_length := length(sequence);

  for i := low(patterns) to high(patterns) do
  begin
    pattern := patterns[i];
    count := count_matches( pattern, sequence );
    tmp := delChars( delChars(pattern,'('), ')' );
    writeln( tmp, ' ', count);
  end;


  //  Replace.
  for i := low(replacements) to high(replacements) do
  begin
    letter := replacements[i][1];  repl := replacements[i][2];
    reps := replace_matches(letter,repl,sequence,new_seq);
    sequence := new_seq;
  end;


  writeln;
  writeln( init_length );
  writeln( clean_length );
  writeln( length(sequence) );
end.