1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
|
{ The Computer Language Benchmarks Game
http://shootout.alioth.debian.org
contributed by Steve Fisher
modified by Peter Vreman
compile with
fpc -O3 regex-dna.pp
}
uses regexpr,strutils;
function replace_matches( const target: pchar; const repl: ansistring;
const str: ansistring; var dest: ansistring ): longint;
var
engine : tRegexprEngine;
count, index, size : longint;
pstart : pchar;
starti : longint;
begin
if not GenerateRegExprEngine( target, [], engine) then
begin
writeln( 'Failed to generate regex. engine for "',target,'".' );
halt(1)
end;
count := 0;
dest := '';
starti := 1;
pstart := pchar(str);
while starti <= length(str) do
begin
if RegExprPos(engine, pstart, index, size ) then
begin
inc(count);
dest := dest + Copy( str, starti, index) + repl;
inc(pstart,index+size);
inc(starti,index+size);
end
else
break
end;
DestroyRegExprEngine( engine );
dest:=dest+Copy( str, starti, length(str)-starti+1);
exit(count);
end;
function count_matches( target: pchar; const str: ansistring ): longint;
var
engine : tRegexprEngine;
pstart : pchar;
starti,
count, index, size : longint;
begin
if not GenerateRegExprEngine( target, [ref_caseinsensitive], engine) then
begin
writeln( 'Failed to generate regex. engine for "',target,'".' );
halt(1)
end;
count := 0;
pstart := pchar(str);
starti := 1;
while starti <= length(str) do
begin
if RegExprPos(engine, pstart, index, size ) then
begin
inc(count);
inc(pstart,index+size);
inc(starti,index+size);
end
else
break
end;
DestroyRegExprEngine( engine );
exit(count)
end;
const
patterns : array[1..9] of pchar =
(
'(agggtaaa)|(tttaccct)',
'([cgt]gggtaaa)|(tttaccc[acg])',
'(a[act]ggtaaa)|(tttacc[agt]t)',
'(ag[act]gtaaa)|(tttac[agt]ct)',
'(agg[act]taaa)|(ttta[agt]cct)',
'(aggg[acg]aaa)|(ttt[cgt]ccct)',
'(agggt[cgt]aa)|(tt[acg]accct)',
'(agggta[cgt]a)|(t[acg]taccct)',
'(agggtaa[cgt])|([acg]ttaccct)'
);
replacements : array[1..11,1..2] of pchar =
(
('B', '(c|g|t)'), ('D', '(a|g|t)'), ('H', '(a|c|t)'), ('K', '(g|t)'),
('M', '(a|c)'), ('N', '(a|c|g|t)'), ('R', '(a|g)'), ('S', '(c|t)'),
('V', '(a|c|g)'), ('W', '(a|t)'), ('Y', '(c|t)')
);
var
pattern : pchar;
sequence, new_seq : ansiString;
line, tmp: string[255];
letter, repl : pchar;
i, count, init_length, clean_length, reps : longint;
inbuf : array[0..64*1024] of char;
begin
settextbuf(input,inbuf);
sequence := '';
init_length := 0;
while not eof do
begin
readln( line );
init_length += length( line ) + 1;
if line[1] <> '>' then
sequence := sequence + line;
end;
clean_length := length(sequence);
for i := low(patterns) to high(patterns) do
begin
pattern := patterns[i];
count := count_matches( pattern, sequence );
tmp := delChars( delChars(pattern,'('), ')' );
writeln( tmp, ' ', count);
end;
// Replace.
for i := low(replacements) to high(replacements) do
begin
letter := replacements[i][1]; repl := replacements[i][2];
reps := replace_matches(letter,repl,sequence,new_seq);
sequence := new_seq;
end;
writeln;
writeln( init_length );
writeln( clean_length );
writeln( length(sequence) );
end.
|