source [file dirname [info script]]/testing.tcl needs constraint utf8 test utf8-1.1 "Pattern matching - ?" { string match "abc?def" "abc\u00b5def" } 1 test utf8-1.2 "Pattern matching - ?" { string match "abc?def" "abc\u2704def" } 1 test utf8-1.3 "Pattern utf-8 literal" { string match "ab\u00b5\u2704?" "ab\u00b5\u2704x" } 1 test utf8-1.4 "Pattern utf-8 char sets" { string match "a\[b\u00b5\]\u2704?" "a\u00b5\u2704x" } 1 test utf8-1.5 "Pattern utf-8 char sets" { string match "a\[b\u00b5\]\u2704?" "a\u00b6\u2704x" } 0 test utf8-1.6 "Pattern utf-8 char sets" { string match "a\[b\u00b5\]\u2704?" "ab\u2704x" } 1 test utf8-1.7 "Pattern utf-8 char sets" { string match "a\[b\u00b5\]?" "a\u2704x" } 0 test utf8-1.8 "Pattern utf-8 char sets" { string match "a\[\u00b5-\u00c3\]" "a\ubd" } 1 test utf8-1.9 "Pattern utf-8 char sets" { string match "a\[\u00b5-\u00c3\]" "a\uc4" } 0 test utf8-2.1 "Pattern utf-8 nocase" { string match -nocase "a\u1edc\u1ef4*" "A\u1edd\u1ef5XX" } 1 test utf8-2.2 "Pattern utf-8 case difference" { string match "a\u1edc\u1ef4*" "A\u1edd\u1ef5XX" } 0 test utf8-3.1 "lsearch -glob" { lsearch -glob {1 d a\u00b5xyb c} a\ub5*b } 2 test utf8-3.2 "switch -glob" { switch -glob -- a\ub5xyb a\ub5*b { set x 1 } default { set x 0 } set x } 1 set x "\ub5test" test utf8-3.3 "info procs" { proc $x {} { info procs \[\ub5X]???? } $x } $x test utf8-3.3 "info commands" { info commands \[\ub5X]???? } $x test utf8-3.4 "proc name with invalid utf-8" { catch { proc ab\xc2 {} {} } msg } 0 test utf8-3.5 "rename to invalid name" { catch { rename ab\xc2 ab\xc3 } msg } 0 catch {rename ab\xc3 ""} test utf8-4.1 "split with utf-8" { split "zy\u2702xw" x } "zy\u2702 w" test utf8-4.2 "split with utf-8" { split "zy\u2702xw" \u2702 } "zy xw" test utf8-4.2 "split with utf-8" { split "zy\u2702xw" {} } "z y \u2702 x w" test utf8-5.1 "string first with utf-8" { string first w "zy\u2702xw" } 4 test utf8-5.2 "string first with utf-8" { string first \u2702 "\ub5zy\u2702xw" } 3 test utf8-5.3 "string first with utf-8" { string first \u2704 "\ub5zy\u2702xw" } -1 test utf8-5.4 "string first with utf-8" { string first \u2702 "\ub5zy\u2702xw\u2702BB" } 3 test utf8-6.1 "string last with utf-8" { string last w "zy\u2702xw" } 4 test utf8-6.2 "string last with utf-8" { string last \u2702 "\ub5zy\u2702xw" } 3 test utf8-6.3 "string last with utf-8" { string last \u2704 "\ub5zy\u2702xw" } -1 test utf8-6.4 "string last with utf-8" { string last \u2702 "\ub5zy\u2702xw\u2702BB" } 6 test utf8-7.1 "string reverse" { string reverse \ub5Test\u2702 } \u2702tseT\ub5 test utf8-7.2 {append counts correctly} { set x \u2702XYZ append x \u2702XYZ list [string length $x] [string bytelength $x] } {8 12} test utf8-7.3 {Upper, lower for titlecase utf-8} { list [string toupper \u01c5] [string tolower \u01c5] } "\u01c4 \u01c6" test utf8-7.4 {Case folding may change encoded length} { list [string bytelength \u0131] [string bytelength [string toupper \u0131]] } {2 1} test utf8-8.1 {Chars outside the BMP} jim { string length \u{12000}\u{13000} } 2 test utf8-8.2 {Chars outside the BMP} jim { string match "ab\[\u{12000}c\]d" ab\u{12000}d } 1 test utf8-8.3 {Chars outside the BMP} jim { string last d "ab\u{101fff}cd" } 4 test utf8-8.4 {Longer sequences} { string length \u12000 } 2 test utf8-8.5 {\U} jim { set x \U000000b5 } \ub5 test utf8-8.6 {\u invalid} { set x "\u{0000000b5}" } "u{0000000b5}" test utf8-9.1 {string totitle} { string totitle \u01c4-test } "\u01c5-test" test utf8-9.2 {string totitle} { string totitle \u01c5-test } "\u01c5-test" test utf8-9.3 {string totitle} { string totitle abc-\u01c4 } "Abc-\u01c6" # Previously scan was using char length instead of byte length # when iterating over the string test utf8-10.1 {scan with utf-8} { scan ab\u0300c %c%c%c%c a b c d list $a $b $c $d } {97 98 768 99} testreport