source [file dirname [info script]]/testing.tcl

needs constraint utf8

test utf8-1.1 "Pattern matching - ?" {
	string match "abc?def" "abc\u00b5def"
} 1

test utf8-1.2 "Pattern matching - ?" {
	string match "abc?def" "abc\u2704def"
} 1

test utf8-1.3 "Pattern utf-8 literal" {
	string match "ab\u00b5\u2704?" "ab\u00b5\u2704x" 
} 1

test utf8-1.4 "Pattern utf-8 char sets" {
	string match "a\[b\u00b5\]\u2704?" "a\u00b5\u2704x" 
} 1

test utf8-1.5 "Pattern utf-8 char sets" {
	string match "a\[b\u00b5\]\u2704?" "a\u00b6\u2704x" 
} 0
test utf8-1.6 "Pattern utf-8 char sets" {
	string match "a\[b\u00b5\]\u2704?" "ab\u2704x" 
} 1
test utf8-1.7 "Pattern utf-8 char sets" {
	string match "a\[b\u00b5\]?" "a\u2704x"
} 0
test utf8-1.8 "Pattern utf-8 char sets" {
	string match "a\[\u00b5-\u00c3\]" "a\ubd"
} 1
test utf8-1.9 "Pattern utf-8 char sets" {
	string match "a\[\u00b5-\u00c3\]" "a\uc4"
} 0

test utf8-2.1 "Pattern utf-8 nocase" {
	string match -nocase "a\u1edc\u1ef4*" "A\u1edd\u1ef5XX"
} 1

test utf8-2.2 "Pattern utf-8 case difference" {
	string match "a\u1edc\u1ef4*" "A\u1edd\u1ef5XX"
} 0

test utf8-3.1 "lsearch -glob" {
	lsearch -glob {1 d a\u00b5xyb c} a\ub5*b
} 2

test utf8-3.2 "switch -glob" {
	switch -glob -- a\ub5xyb a\ub5*b { set x 1 } default { set x 0 }
	set x
} 1

set x "\ub5test"
test utf8-3.3 "info procs" {
	proc $x {} { info procs \[\ub5X]???? }
	$x
} $x

test utf8-3.3 "info commands" {
	info commands \[\ub5X]????
} $x

test utf8-3.4 "proc name with invalid utf-8" {
	catch { proc ab\xc2 {} {} } msg
} 0

test utf8-3.5 "rename to invalid name" {
	catch { rename ab\xc2 ab\xc3 } msg
} 0

catch {rename ab\xc3 ""}

test utf8-4.1 "split with utf-8" {
	split "zy\u2702xw" x
} "zy\u2702 w"

test utf8-4.2 "split with utf-8" {
	split "zy\u2702xw" \u2702
} "zy xw"

test utf8-4.2 "split with utf-8" {
	split "zy\u2702xw" {}
} "z y \u2702 x w"

test utf8-5.1 "string first with utf-8" {
	string first w "zy\u2702xw"
} 4

test utf8-5.2 "string first with utf-8" {
	string first \u2702 "\ub5zy\u2702xw"
} 3

test utf8-5.3 "string first with utf-8" {
	string first \u2704 "\ub5zy\u2702xw"
} -1

test utf8-5.4 "string first with utf-8" {
	string first \u2702 "\ub5zy\u2702xw\u2702BB"
} 3

test utf8-6.1 "string last with utf-8" {
	string last w "zy\u2702xw"
} 4

test utf8-6.2 "string last with utf-8" {
	string last \u2702 "\ub5zy\u2702xw"
} 3

test utf8-6.3 "string last with utf-8" {
	string last \u2704 "\ub5zy\u2702xw"
} -1

test utf8-6.4 "string last with utf-8" {
	string last \u2702 "\ub5zy\u2702xw\u2702BB"
} 6

test utf8-7.1 "string reverse" {
	string reverse \ub5Test\u2702
} \u2702tseT\ub5

test utf8-7.2 {append counts correctly} {
	set x \u2702XYZ
	append x \u2702XYZ
	list [string length $x] [string bytelength $x]
} {8 12}

test utf8-7.3 {Upper, lower for titlecase utf-8} {
	list [string toupper \u01c5] [string tolower \u01c5]
} "\u01c4 \u01c6"

test utf8-7.4 {Case folding may change encoded length} {
	list [string bytelength \u0131] [string bytelength [string toupper \u0131]]
} {2 1}

test utf8-8.1 {Chars outside the BMP} jim {
	string length \u{12000}\u{13000}
} 2

test utf8-8.2 {Chars outside the BMP} jim {
	string match "ab\[\u{12000}c\]d" ab\u{12000}d
} 1

test utf8-8.3 {Chars outside the BMP} jim {
	string last d "ab\u{101fff}cd"
} 4

test utf8-8.4 {Longer sequences} {
	string length \u12000
} 2

test utf8-8.5 {\U} jim {
	set x \U000000b5
} \ub5

test utf8-8.6 {\u invalid} {
	set x "\u{0000000b5}"
} "u{0000000b5}"

test utf8-9.1 {string totitle} {
	string totitle \u01c4-test
} "\u01c5-test"

test utf8-9.2 {string totitle} {
	string totitle \u01c5-test
} "\u01c5-test"

test utf8-9.3 {string totitle} {
	string totitle abc-\u01c4
} "Abc-\u01c6"

# Previously scan was using char length instead of byte length
# when iterating over the string
test utf8-10.1 {scan with utf-8} {
	scan ab\u0300c %c%c%c%c a b c d
	list $a $b $c $d
} {97 98 768 99}

testreport