diff options
author | Ken Thompson <ken@golang.org> | 2009-04-10 19:49:31 -0700 |
---|---|---|
committer | Ken Thompson <ken@golang.org> | 2009-04-10 19:49:31 -0700 |
commit | a0b44b426de5d1acdf5a135049441e5c85708d92 (patch) | |
tree | d031515005f681cd92520471e0db9d84513a3c28 | |
parent | 5dc81dc7f9caae7bfcd63137691f67ff714ffb2d (diff) | |
download | golang-a0b44b426de5d1acdf5a135049441e5c85708d92.tar.gz |
range over strings
R=r
OCL=27332
CL=27332
-rw-r--r-- | src/cmd/gc/builtin.c.boot | 2 | ||||
-rw-r--r-- | src/cmd/gc/sys.go | 2 | ||||
-rw-r--r-- | src/cmd/gc/walk.c | 77 | ||||
-rw-r--r-- | src/runtime/string.c | 156 |
4 files changed, 235 insertions, 2 deletions
diff --git a/src/cmd/gc/builtin.c.boot b/src/cmd/gc/builtin.c.boot index 07b0c828f..81ec84c37 100644 --- a/src/cmd/gc/builtin.c.boot +++ b/src/cmd/gc/builtin.c.boot @@ -20,6 +20,8 @@ char *sysimport = "func sys.intstring (? int64) (? string)\n" "func sys.byteastring (? *uint8, ? int) (? string)\n" "func sys.arraystring (? []uint8) (? string)\n" + "func sys.stringiter (? string, ? int) (? int)\n" + "func sys.stringiter2 (? string, ? int) (retk int, retv int)\n" "func sys.ifaceT2I (sigi *uint8, sigt *uint8, elem any) (ret any)\n" "func sys.ifaceI2T (sigt *uint8, iface any) (ret any)\n" "func sys.ifaceI2T2 (sigt *uint8, iface any) (ret any, ok bool)\n" diff --git a/src/cmd/gc/sys.go b/src/cmd/gc/sys.go index c86a9f528..6a0a6b349 100644 --- a/src/cmd/gc/sys.go +++ b/src/cmd/gc/sys.go @@ -29,6 +29,8 @@ func indexstring(string, int) byte; func intstring(int64) string; func byteastring(*byte, int) string; func arraystring([]byte) string; +func stringiter(string, int) int; +func stringiter2(string, int) (retk int, retv int); func ifaceT2I(sigi *byte, sigt *byte, elem any) (ret any); func ifaceI2T(sigt *byte, iface any) (ret any); diff --git a/src/cmd/gc/walk.c b/src/cmd/gc/walk.c index d3a4adbf3..fb8b7ca00 100644 --- a/src/cmd/gc/walk.c +++ b/src/cmd/gc/walk.c @@ -3191,7 +3191,7 @@ badt: * rewrite a range statement * k and v are names/new_names * m is an array or map - * local is =/0 or :=/1 + * local is 0 (meaning =) or 1 (meaning :=) */ Node* dorange(Node *nn) @@ -3227,8 +3227,10 @@ dorange(Node *nn) goto map; if(t->etype == TCHAN) goto chan; + if(t->etype == TSTRING) + goto strng; - yyerror("range must be over map/array"); + yyerror("range must be over map/array/chan/string"); goto out; ary: @@ -3333,6 +3335,77 @@ chan: addtotop(n); goto out; +strng: + hk = nod(OXXX, N, N); // hidden key + tempname(hk, types[TINT]); + + ha = nod(OXXX, N, N); // hidden string + tempname(ha, t); + + + if(local) { + k = old2new(k, types[TINT]); + if(v != N) + v = old2new(v, types[TINT]); + } + + // ha = s + a = nod(OAS, ha, m); + n->ninit = a; + + // kh = 0 + a = nod(OAS, hk, nodintconst(0)); + n->ninit = list(n->ninit, a); + + // k = hk + a = nod(OAS, k, hk); + n->ninit = list(n->ninit, a); + + + // hk[,v] = stringiter(ha,hk) + if(v != N) { + // hk,v = stringiter2(ha, hk) + on = syslook("stringiter2", 0); +// argtype(on, v->type); + a = list(ha, hk); + a = nod(OCALL, on, a); + a = nod(OAS, list(hk, v), a); + } else { + // hk = stringiter(ha, hk) + on = syslook("stringiter", 0); + a = list(ha, hk); + a = nod(OCALL, on, a); + a = nod(OAS, hk, a); + } + n->ninit = list(n->ninit, a); + + // while(hk != 0) + n->ntest = nod(ONE, hk, nodintconst(0)); + + // k = hk + a = nod(OAS, k, hk); + n->nincr = a; + + // hk[,v] = stringiter(ha,hk) + if(v != N) { + // hk,v = stringiter2(ha, hk) + on = syslook("stringiter2", 0); +// argtype(on, v->type); + a = list(ha, hk); + a = nod(OCALL, on, a); + a = nod(OAS, list(hk, v), a); + } else { + // hk = stringiter(ha, hk) + on = syslook("stringiter", 0); + a = list(ha, hk); + a = nod(OCALL, on, a); + a = nod(OAS, hk, a); + } + n->nincr = list(n->nincr, a); + + addtotop(n); + goto out; + out: return n; } diff --git a/src/runtime/string.c b/src/runtime/string.c index 212fa5c28..37f6ff0a1 100644 --- a/src/runtime/string.c +++ b/src/runtime/string.c @@ -188,3 +188,159 @@ sys·arraystring(Array b, String s) mcpy(s.str, b.array, s.len); FLUSH(&s); } + +static int32 chartorune(int32 *rune, byte *str); +enum +{ + Runeself = 0x80, + Runeerror = 0xfff8, // fffd in plan9 +}; + +// func stringiter(string, int) (retk int); +void +sys·stringiter(String s, int32 k, int32 retk) +{ + int32 l, n; + + if(k >= s.len) { + // retk=0 is end of iteration + retk = 0; + goto out; + } + + l = s.str[k]; + n = 1; + + if(l >= Runeself) { + // multi-char rune + n = chartorune(&l, s.str+k); + if(k+n > s.len) { + // special case of multi-char rune + // that ran off end of string + l = Runeerror; + n = 1; + } + } + + retk = k+n; + +out: + FLUSH(&retk); +} + +// func stringiter2(string, int) (retk int, retv any); +void +sys·stringiter2(String s, int32 k, int32 retk, int32 retv) +{ + int32 l, n; + + if(k >= s.len) { + // retk=0 is end of iteration + retk = 0; + retv = 0; + goto out; + } + + l = s.str[k]; + n = 1; + + if(l >= Runeself) { + // multi-char rune + n = chartorune(&l, s.str+k); + if(k+n > s.len) { + // special case of multi-char rune + // that ran off end of string + l = Runeerror; + n = 1; + } + } + + retk = k+n; + retv = l; + +out: + FLUSH(&retk); + FLUSH(&retv); +} + +// +// copied from plan9 library +// + +enum +{ + Bit1 = 7, + Bitx = 6, + Bit2 = 5, + Bit3 = 4, + Bit4 = 3, + + T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ + Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ + T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ + T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ + T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ + + Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */ + Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */ + Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */ + + Maskx = (1<<Bitx)-1, /* 0011 1111 */ + Testx = Maskx ^ 0xFF, /* 1100 0000 */ +}; + +static int32 +chartorune(int32 *rune, byte *str) +{ + int32 c, c1, c2; + int32 l; + + /* + * one character sequence + * 00000-0007F => T1 + */ + c = str[0]; + if(c < Tx) { + *rune = c; + return 1; + } + + /* + * two character sequence + * 0080-07FF => T2 Tx + */ + c1 = str[1] ^ Tx; + if(c1 & Testx) + goto bad; + if(c < T3) { + if(c < T2) + goto bad; + l = ((c << Bitx) | c1) & Rune2; + if(l <= Rune1) + goto bad; + *rune = l; + return 2; + } + + /* + * three character sequence + * 0800-FFFF => T3 Tx Tx + */ + c2 = str[2] ^ Tx; + if(c2 & Testx) + goto bad; + if(c < T4) { + l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; + if(l <= Rune2) + goto bad; + *rune = l; + return 3; + } + + /* + * bad decoding + */ +bad: + *rune = Runeerror; + return 1; +} |