// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package norm import ( "strings" "testing" ) func doIterNorm(f Form, s string) []byte { acc := []byte{} i := Iter{} i.InitString(f, s) for !i.Done() { acc = append(acc, i.Next()...) } return acc } func runIterTests(t *testing.T, name string, f Form, tests []AppendTest, norm bool) { for i, test := range tests { in := test.left + test.right gold := test.out if norm { gold = string(f.AppendString(nil, test.out)) } out := string(doIterNorm(f, in)) if len(out) != len(gold) { const msg = "%s:%d: length is %d; want %d" t.Errorf(msg, name, i, len(out), len(gold)) } if out != gold { // Find first rune that differs and show context. ir := []rune(out) ig := []rune(gold) t.Errorf("\n%X != \n%X", ir, ig) for j := 0; j < len(ir) && j < len(ig); j++ { if ir[j] == ig[j] { continue } if j -= 3; j < 0 { j = 0 } for e := j + 7; j < e && j < len(ir) && j < len(ig); j++ { const msg = "%s:%d: runeAt(%d) = %U; want %U" t.Errorf(msg, name, i, j, ir[j], ig[j]) } break } } } } func rep(r rune, n int) string { return strings.Repeat(string(r), n) } const segSize = maxByteBufferSize var iterTests = []AppendTest{ {"", ascii, ascii}, {"", txt_all, txt_all}, {"", "a" + rep(0x0300, segSize/2), "a" + rep(0x0300, segSize/2)}, } var iterTestsD = []AppendTest{ { // segment overflow on unchanged character "", "a" + rep(0x0300, segSize/2) + "\u0316", "a" + rep(0x0300, segSize/2-1) + "\u0316\u0300", }, { // segment overflow on unchanged character + start value "", "a" + rep(0x0300, segSize/2+maxCombiningChars+4) + "\u0316", "a" + rep(0x0300, segSize/2+maxCombiningChars) + "\u0316" + rep(0x300, 4), }, { // segment overflow on decomposition "", "a" + rep(0x0300, segSize/2-1) + "\u0340", "a" + rep(0x0300, segSize/2), }, { // segment overflow on decomposition + start value "", "a" + rep(0x0300, segSize/2-1) + "\u0340" + rep(0x300, maxCombiningChars+4) + "\u0320", "a" + rep(0x0300, segSize/2-1) + rep(0x300, maxCombiningChars+1) + "\u0320" + rep(0x300, 4), }, { // start value after ASCII overflow "", rep('a', segSize) + rep(0x300, maxCombiningChars+2) + "\u0320", rep('a', segSize) + rep(0x300, maxCombiningChars) + "\u0320\u0300\u0300", }, { // start value after Hangul overflow "", rep(0xAC00, segSize/6) + rep(0x300, maxCombiningChars+2) + "\u0320", strings.Repeat("\u1100\u1161", segSize/6) + rep(0x300, maxCombiningChars+1) + "\u0320" + rep(0x300, 1), }, { // start value after cc=0 "", "您您" + rep(0x300, maxCombiningChars+4) + "\u0320", "您您" + rep(0x300, maxCombiningChars) + "\u0320" + rep(0x300, 4), }, { // start value after normalization "", "\u0300\u0320a" + rep(0x300, maxCombiningChars+4) + "\u0320", "\u0320\u0300a" + rep(0x300, maxCombiningChars) + "\u0320" + rep(0x300, 4), }, } var iterTestsC = []AppendTest{ { // ordering of non-composing combining characters "", "\u0305\u0316", "\u0316\u0305", }, { // segment overflow "", "a" + rep(0x0305, segSize/2+4) + "\u0316", "a" + rep(0x0305, segSize/2-1) + "\u0316" + rep(0x305, 5), }, } func TestIterNextD(t *testing.T) { runIterTests(t, "IterNextD1", NFKD, appendTests, true) runIterTests(t, "IterNextD2", NFKD, iterTests, true) runIterTests(t, "IterNextD3", NFKD, iterTestsD, false) } func TestIterNextC(t *testing.T) { runIterTests(t, "IterNextC1", NFKC, appendTests, true) runIterTests(t, "IterNextC2", NFKC, iterTests, true) runIterTests(t, "IterNextC3", NFKC, iterTestsC, false) } type SegmentTest struct { in string out []string } var segmentTests = []SegmentTest{ {"\u1E0A\u0323a", []string{"\x44\u0323\u0307", "a", ""}}, {rep('a', segSize), append(strings.Split(rep('a', segSize), ""), "")}, {rep('a', segSize+2), append(strings.Split(rep('a', segSize+2), ""), "")}, {rep('a', segSize) + "\u0300aa", append(strings.Split(rep('a', segSize-1), ""), "a\u0300", "a", "a", "")}, } var segmentTestsK = []SegmentTest{ {"\u3332", []string{"\u30D5", "\u30A1", "\u30E9", "\u30C3", "\u30C8\u3099", ""}}, // last segment of multi-segment decomposition needs normalization {"\u3332\u093C", []string{"\u30D5", "\u30A1", "\u30E9", "\u30C3", "\u30C8\u093C\u3099", ""}}, // Hangul and Jamo are grouped togeter. {"\uAC00", []string{"\u1100\u1161", ""}}, {"\uAC01", []string{"\u1100\u1161\u11A8", ""}}, {"\u1100\u1161", []string{"\u1100\u1161", ""}}, } // Note that, by design, segmentation is equal for composing and decomposing forms. func TestIterSegmentation(t *testing.T) { segmentTest(t, "SegmentTestD", NFD, segmentTests) segmentTest(t, "SegmentTestC", NFC, segmentTests) segmentTest(t, "SegmentTestD", NFKD, segmentTestsK) segmentTest(t, "SegmentTestC", NFKC, segmentTestsK) } func segmentTest(t *testing.T, name string, f Form, tests []SegmentTest) { iter := Iter{} for i, tt := range tests { iter.InitString(f, tt.in) for j, seg := range tt.out { if seg == "" { if !iter.Done() { res := string(iter.Next()) t.Errorf(`%s:%d:%d: expected Done()==true, found segment "%s"`, name, i, j, res) } continue } if iter.Done() { t.Errorf("%s:%d:%d: Done()==true, want false", name, i, j) } seg = f.String(seg) if res := string(iter.Next()); res != seg { t.Errorf(`%s:%d:%d" segment was "%s" (%d); want "%s" (%d) %X %X`, name, i, j, res, len(res), seg, len(seg), []rune(res), []rune(seg)) } } } }