mirror of
https://github.com/tcltk/tcl.git
synced 2026-05-29 00:27:49 +08:00
1280 lines
48 KiB
Plaintext
1280 lines
48 KiB
Plaintext
# This file contains a collection of tests for tclEncoding.c
|
||
# Sourcing this file into Tcl runs the tests and generates output for errors.
|
||
# No output means no errors were found.
|
||
#
|
||
# Copyright © 1997 Sun Microsystems, Inc.
|
||
# Copyright © 1998-1999 Scriptics Corporation.
|
||
#
|
||
# See the file "license.terms" for information on usage and redistribution of
|
||
# this file, and for a DISCLAIMER OF ALL WARRANTIES.
|
||
|
||
if {"::tcltest" ni [namespace children]} {
|
||
package require tcltest 2.5
|
||
namespace import -force ::tcltest::*
|
||
}
|
||
source [file join [file dirname [info script]] tcltests.tcl]
|
||
|
||
namespace eval ::tcl::test::encoding {
|
||
variable x
|
||
|
||
catch {
|
||
::tcltest::loadTestedCommands
|
||
package require -exact tcl::test [info patchlevel]
|
||
}
|
||
|
||
proc toutf {args} {
|
||
variable x
|
||
lappend x "toutf $args"
|
||
}
|
||
proc fromutf {args} {
|
||
variable x
|
||
lappend x "fromutf $args"
|
||
}
|
||
|
||
proc runtests {} {
|
||
variable x
|
||
|
||
# Some tests require the testencoding command
|
||
testConstraint testencoding [llength [info commands testencoding]]
|
||
testConstraint testbytestring [llength [info commands testbytestring]]
|
||
testConstraint teststringbytes [llength [info commands teststringbytes]]
|
||
testConstraint exec [llength [info commands exec]]
|
||
|
||
# TclInitEncodingSubsystem is tested by the rest of this file
|
||
# TclFinalizeEncodingSubsystem is not currently tested
|
||
|
||
test encoding-1.1 {Tcl_GetEncoding: system encoding} -setup {
|
||
set old [encoding system]
|
||
} -constraints {testencoding} -body {
|
||
testencoding create foo [namespace origin toutf] [namespace origin fromutf]
|
||
encoding system foo
|
||
set x {}
|
||
encoding convertto abcd
|
||
return $x
|
||
} -cleanup {
|
||
encoding system $old
|
||
testencoding delete foo
|
||
} -result {{fromutf }}
|
||
test encoding-1.2 {Tcl_GetEncoding: existing encoding} {testencoding} {
|
||
testencoding create foo [namespace origin toutf] [namespace origin fromutf]
|
||
set x {}
|
||
encoding convertto foo abcd
|
||
testencoding delete foo
|
||
return $x
|
||
} {{fromutf }}
|
||
test encoding-1.3 {Tcl_GetEncoding: load encoding} {
|
||
list [encoding convertto jis0208 乎] \
|
||
[encoding convertfrom jis0208 8C]
|
||
} "8C 乎"
|
||
|
||
test encoding-2.1 {Tcl_FreeEncoding: refcount == 0} {
|
||
encoding convertto jis0208 乎
|
||
} {8C}
|
||
test encoding-2.2 {Tcl_FreeEncoding: refcount != 0} -setup {
|
||
set system [encoding system]
|
||
set path [encoding dirs]
|
||
} -constraints {testencoding} -body {
|
||
encoding system shiftjis ;# incr ref count
|
||
encoding dirs [list [pwd]]
|
||
set x [encoding convertto shiftjis 乎] ;# old one found
|
||
encoding system iso8859-1
|
||
llength shiftjis ;# Shimmer away any cache of Tcl_Encoding
|
||
lappend x [catch {encoding convertto shiftjis 乎} msg] $msg
|
||
} -cleanup {
|
||
encoding system iso8859-1
|
||
encoding dirs $path
|
||
encoding system $system
|
||
unset -nocomplain path
|
||
} -result "\x8C\xC1 1 {unknown encoding \"shiftjis\"}"
|
||
|
||
test encoding-3.1 {Tcl_GetEncodingName, NULL} -setup {
|
||
set old [encoding system]
|
||
} -body {
|
||
encoding system shiftjis
|
||
encoding system
|
||
} -cleanup {
|
||
encoding system $old
|
||
} -result {shiftjis}
|
||
test encoding-3.2 {Tcl_GetEncodingName, non-null} -setup {
|
||
set old [fconfigure stdout -encoding]
|
||
} -body {
|
||
fconfigure stdout -encoding jis0208
|
||
fconfigure stdout -encoding
|
||
} -cleanup {
|
||
fconfigure stdout -encoding $old
|
||
} -result {jis0208}
|
||
test encoding-3.3 {fconfigure -profile} -setup {
|
||
set old [fconfigure stdout -profile]
|
||
} -body {
|
||
fconfigure stdout -profile replace
|
||
fconfigure stdout -profile
|
||
} -cleanup {
|
||
fconfigure stdout -profile $old
|
||
} -result replace
|
||
|
||
test encoding-4.1 {Tcl_GetEncodingNames} -constraints {testencoding} -setup {
|
||
cd [makeDirectory tmp]
|
||
makeDirectory [file join tmp encoding]
|
||
set path [encoding dirs]
|
||
encoding dirs {}
|
||
catch {unset encodings}
|
||
catch {unset x}
|
||
} -body {
|
||
foreach encoding [encoding names] {
|
||
set encodings($encoding) 1
|
||
}
|
||
makeFile {} [file join tmp encoding junk.enc]
|
||
makeFile {} [file join tmp encoding junk2.enc]
|
||
encoding dirs [list [file join [pwd] encoding]]
|
||
foreach encoding [encoding names] {
|
||
if {![info exists encodings($encoding)]} {
|
||
lappend x $encoding
|
||
}
|
||
}
|
||
lsort $x
|
||
} -cleanup {
|
||
encoding dirs $path
|
||
cd [workingDirectory]
|
||
removeFile [file join tmp encoding junk2.enc]
|
||
removeFile [file join tmp encoding junk.enc]
|
||
removeDirectory [file join tmp encoding]
|
||
removeDirectory tmp
|
||
unset -nocomplain path
|
||
} -result {junk junk2}
|
||
|
||
test encoding-5.1 {Tcl_SetSystemEncoding} -setup {
|
||
set old [encoding system]
|
||
} -body {
|
||
encoding system jis0208
|
||
encoding convertto 乎
|
||
} -cleanup {
|
||
encoding system iso8859-1
|
||
encoding system $old
|
||
} -result {8C}
|
||
test encoding-5.2 {Tcl_SetSystemEncoding: test ref count} {
|
||
set old [encoding system]
|
||
encoding system $old
|
||
string compare $old [encoding system]
|
||
} {0}
|
||
|
||
test encoding-6.1 {Tcl_CreateEncoding: new} {testencoding} {
|
||
testencoding create foo [namespace code {toutf 1}] \
|
||
[namespace code {fromutf 2}]
|
||
set x {}
|
||
encoding convertfrom foo abcd
|
||
encoding convertto foo abcd
|
||
testencoding delete foo
|
||
return $x
|
||
} {{toutf 1} {fromutf 2}}
|
||
test encoding-6.2 {Tcl_CreateEncoding: replace encoding} {testencoding} {
|
||
testencoding create foo [namespace code {toutf a}] \
|
||
[namespace code {fromutf b}]
|
||
set x {}
|
||
encoding convertfrom foo abcd
|
||
encoding convertto foo abcd
|
||
testencoding delete foo
|
||
return $x
|
||
} {{toutf a} {fromutf b}}
|
||
|
||
test encoding-7.1 {Tcl_ExternalToUtfDString: small buffer} {
|
||
encoding convertfrom jis0208 8c8c8c8c
|
||
} 吾吾吾吾
|
||
test encoding-7.2 {Tcl_UtfToExternalDString: big buffer} {
|
||
set a 8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C
|
||
append a $a
|
||
append a $a
|
||
append a $a
|
||
append a $a
|
||
set x [encoding convertfrom jis0208 $a]
|
||
list [string length $x] [string index $x 0]
|
||
} "512 乎"
|
||
|
||
test encoding-8.1 {Tcl_ExternalToUtf} {
|
||
set f [open [file join [temporaryDirectory] dummy] w]
|
||
fconfigure $f -translation binary
|
||
puts -nonewline $f "ab\x8C\xC1g"
|
||
close $f
|
||
set f [open [file join [temporaryDirectory] dummy] r]
|
||
fconfigure $f -translation lf -encoding shiftjis
|
||
set x [read $f]
|
||
close $f
|
||
file delete [file join [temporaryDirectory] dummy]
|
||
return $x
|
||
} ab乎g
|
||
|
||
test encoding-9.1 {Tcl_UtfToExternalDString: small buffer} {
|
||
encoding convertto jis0208 "吾吾吾吾"
|
||
} {8c8c8c8c}
|
||
test encoding-9.2 {Tcl_UtfToExternalDString: big buffer} {
|
||
set a 乎乎乎乎乎乎乎乎
|
||
append a $a
|
||
append a $a
|
||
append a $a
|
||
append a $a
|
||
append a $a
|
||
append a $a
|
||
set x [encoding convertto jis0208 $a]
|
||
list [string length $x] [string range $x 0 1]
|
||
} "1024 8C"
|
||
|
||
test encoding-10.1 {Tcl_UtfToExternal} {
|
||
set f [open [file join [temporaryDirectory] dummy] w]
|
||
fconfigure $f -translation lf -encoding shiftjis
|
||
puts -nonewline $f ab乎g
|
||
close $f
|
||
set f [open [file join [temporaryDirectory] dummy] r]
|
||
fconfigure $f -translation binary
|
||
set x [read $f]
|
||
close $f
|
||
file delete [file join [temporaryDirectory] dummy]
|
||
return $x
|
||
} "ab\x8C\xC1g"
|
||
|
||
test encoding-11.1 {LoadEncodingFile: unknown encoding} {testencoding} {
|
||
set system [encoding system]
|
||
set path [encoding dirs]
|
||
encoding system iso8859-1
|
||
encoding dirs {}
|
||
llength jis0208 ;# Shimmer any cached Tcl_Encoding in shared literal
|
||
set x [list [catch {encoding convertto jis0208 乎} msg] $msg]
|
||
encoding dirs $path
|
||
unset -nocomplain path
|
||
encoding system $system
|
||
lappend x [encoding convertto jis0208 乎]
|
||
} {1 {unknown encoding "jis0208"} 8C}
|
||
test encoding-11.2 {LoadEncodingFile: single-byte} {
|
||
encoding convertfrom jis0201 \xA1
|
||
} 。
|
||
test encoding-11.3 {LoadEncodingFile: double-byte} {
|
||
encoding convertfrom jis0208 8C
|
||
} 乎
|
||
test encoding-11.4 {LoadEncodingFile: multi-byte} {
|
||
encoding convertfrom shiftjis \x8C\xC1
|
||
} 乎
|
||
test encoding-11.5 {LoadEncodingFile: escape file} {
|
||
encoding convertto iso2022 乎
|
||
} \x1B\$B8C\x1B(B
|
||
test encoding-11.5.1 {LoadEncodingFile: escape file} {
|
||
encoding convertto iso2022-jp 乎
|
||
} \x1B\$B8C\x1B(B
|
||
test encoding-11.6 {LoadEncodingFile: invalid file} -constraints {testencoding} -setup {
|
||
set system [encoding system]
|
||
set path [encoding dirs]
|
||
encoding system iso8859-1
|
||
} -body {
|
||
cd [temporaryDirectory]
|
||
encoding dirs [file join tmp encoding]
|
||
makeDirectory tmp
|
||
makeDirectory [file join tmp encoding]
|
||
set f [open [file join tmp encoding splat.enc] w]
|
||
fconfigure $f -translation binary
|
||
puts $f "abcdefghijklmnop"
|
||
close $f
|
||
encoding convertto splat 乎
|
||
} -returnCodes error -cleanup {
|
||
file delete [file join [temporaryDirectory] tmp encoding splat.enc]
|
||
removeDirectory [file join tmp encoding]
|
||
removeDirectory tmp
|
||
cd [workingDirectory]
|
||
encoding dirs $path
|
||
encoding system $system
|
||
unset -nocomplain path
|
||
} -result {invalid encoding file "splat"}
|
||
test encoding-11.8 {encoding: extended Unicode UTF-16} {
|
||
encoding convertto utf-16le 😹
|
||
} =Ø9Þ
|
||
test encoding-11.9 {encoding: extended Unicode UTF-16} {
|
||
encoding convertto utf-16be 😹
|
||
} Ø=Þ9
|
||
test encoding-11.10 {encoding: extended Unicode UTF-32} {
|
||
encoding convertto utf-32le 😹
|
||
} 9\xF6\x01\x00
|
||
test encoding-11.11 {encoding: extended Unicode UTF-32} {
|
||
encoding convertto utf-32be 😹
|
||
} \x00\x01\xF69
|
||
# OpenEncodingFile is fully tested by the rest of the tests in this file.
|
||
|
||
test encoding-12.1 {LoadTableEncoding: normal encoding} {
|
||
set x [encoding convertto iso8859-3 Ġ]
|
||
append x [encoding convertto -profile tcl8 iso8859-3 Õ]
|
||
append x [encoding convertfrom iso8859-3 Õ]
|
||
} "Õ?Ġ"
|
||
test encoding-12.2 {LoadTableEncoding: single-byte encoding} {
|
||
set x [encoding convertto iso8859-3 abĠg]
|
||
append x [encoding convertfrom iso8859-3 abÕg]
|
||
} "abÕgabĠg"
|
||
test encoding-12.3 {LoadTableEncoding: multi-byte encoding} {
|
||
set x [encoding convertto shiftjis ab乎g]
|
||
append x [encoding convertfrom shiftjis ab\x8C\xC1g]
|
||
} "ab\x8C\xC1gab乎g"
|
||
test encoding-12.4 {LoadTableEncoding: double-byte encoding} {
|
||
set x [encoding convertto jis0208 乎α]
|
||
append x [encoding convertfrom jis0208 8C&A]
|
||
} "8C&A乎α"
|
||
test encoding-12.5 {LoadTableEncoding: symbol encoding} {
|
||
set x [encoding convertto symbol γ]
|
||
append x [encoding convertto symbol g]
|
||
append x [encoding convertfrom symbol g]
|
||
} "ggγ"
|
||
test encoding-12.7 {cp864 [ecafd8611d]} {
|
||
encoding convertfrom cp864 \xA7
|
||
} €
|
||
test encoding-12.8 {cp165 [ecafd8611d]} {
|
||
encoding convertfrom cp165 \xA7
|
||
} ﺈ
|
||
|
||
test encoding-13.1 {LoadEscapeTable} {
|
||
encoding convertto iso2022 ab乎棙g
|
||
} ab\x1B\$B8C\x1B\$\(DD%\x1B(Bg
|
||
|
||
test encoding-15.1 {UtfToUtfProc} {
|
||
encoding convertto utf-8 £
|
||
} "\xC2\xA3"
|
||
test encoding-15.2 {UtfToUtfProc null character output} testbytestring {
|
||
binary scan [testbytestring [encoding convertto utf-8 \x00]] H* z
|
||
set z
|
||
} 00
|
||
test encoding-15.3 {UtfToUtfProc null character input} teststringbytes {
|
||
set y [encoding convertfrom utf-8 [encoding convertto utf-8 \x00]]
|
||
binary scan [teststringbytes $y] H* z
|
||
set z
|
||
} c080
|
||
test encoding-15.4 {UtfToUtfProc emoji character input} -body {
|
||
set x \xED\xA0\xBD\xED\xB8\x82
|
||
set y [encoding convertfrom -profile tcl8 utf-8 \xED\xA0\xBD\xED\xB8\x82]
|
||
list [string length $x] $y
|
||
} -result "6 \uD83D\uDE02"
|
||
test encoding-15.5 {UtfToUtfProc emoji character input} {
|
||
set x \xF0\x9F\x98\x82
|
||
set y [encoding convertfrom utf-8 \xF0\x9F\x98\x82]
|
||
list [string length $x] $y
|
||
} "4 😂"
|
||
test encoding-15.6 {UtfToUtfProc emoji character output} {
|
||
set x \uDE02\uD83D\uDE02\uD83D
|
||
set y [encoding convertto -profile tcl8 utf-8 \uDE02\uD83D\uDE02\uD83D]
|
||
binary scan $y H* z
|
||
list [string length $y] $z
|
||
} {12 edb882eda0bdedb882eda0bd}
|
||
test encoding-15.7 {UtfToUtfProc emoji character output} {
|
||
set x \uDE02\uD83D\uD83D
|
||
set y [encoding convertto -profile tcl8 utf-8 \uDE02\uD83D\uD83D]
|
||
binary scan $y H* z
|
||
list [string length $x] [string length $y] $z
|
||
} {3 9 edb882eda0bdeda0bd}
|
||
test encoding-15.8 {UtfToUtfProc emoji character output} {
|
||
set x \uDE02\uD83Dé
|
||
set y [encoding convertto -profile tcl8 utf-8 \uDE02\uD83Dé]
|
||
binary scan $y H* z
|
||
list [string length $x] [string length $y] $z
|
||
} {3 8 edb882eda0bdc3a9}
|
||
test encoding-15.9 {UtfToUtfProc emoji character output} {
|
||
set x \uDE02\uD83DX
|
||
set y [encoding convertto -profile tcl8 utf-8 \uDE02\uD83DX]
|
||
binary scan $y H* z
|
||
list [string length $x] [string length $y] $z
|
||
} {3 7 edb882eda0bd58}
|
||
test encoding-15.10 {UtfToUtfProc high surrogate character output} {
|
||
set x \uDE02é
|
||
set y [encoding convertto -profile tcl8 utf-8 \uDE02é]
|
||
binary scan $y H* z
|
||
list [string length $x] [string length $y] $z
|
||
} {2 5 edb882c3a9}
|
||
test encoding-15.11 {UtfToUtfProc low surrogate character output} {
|
||
set x \uDA02é
|
||
set y [encoding convertto -profile tcl8 utf-8 \uDA02é]
|
||
binary scan $y H* z
|
||
list [string length $x] [string length $y] $z
|
||
} {2 5 eda882c3a9}
|
||
test encoding-15.12 {UtfToUtfProc high surrogate character output} {
|
||
set x \uDE02Y
|
||
set y [encoding convertto -profile tcl8 utf-8 \uDE02Y]
|
||
binary scan $y H* z
|
||
list [string length $x] [string length $y] $z
|
||
} {2 4 edb88259}
|
||
test encoding-15.13 {UtfToUtfProc low surrogate character output} {
|
||
set x \uDA02Y
|
||
set y [encoding convertto -profile tcl8 utf-8 \uDA02Y]
|
||
binary scan $y H* z
|
||
list [string length $x] [string length $y] $z
|
||
} {2 4 eda88259}
|
||
test encoding-15.14 {UtfToUtfProc high surrogate character output} {
|
||
set x \uDE02
|
||
set y [encoding convertto -profile tcl8 utf-8 \uDE02]
|
||
binary scan $y H* z
|
||
list [string length $x] [string length $y] $z
|
||
} {1 3 edb882}
|
||
test encoding-15.15 {UtfToUtfProc low surrogate character output} {
|
||
set x \uDA02
|
||
set y [encoding convertto -profile tcl8 utf-8 \uDA02]
|
||
binary scan $y H* z
|
||
list [string length $x] [string length $y] $z
|
||
} {1 3 eda882}
|
||
test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} {
|
||
set x \xF0\xA0\xA1\xC2
|
||
set y [encoding convertfrom -profile tcl8 utf-8 \xF0\xA0\xA1\xC2]
|
||
list [string length $x] $y
|
||
} "4 \xF0\xA0\xA1\xC2"
|
||
test encoding-15.17 {UtfToUtfProc emoji character output} {
|
||
set x 😂
|
||
set y [encoding convertto utf-8 😂]
|
||
binary scan $y H* z
|
||
list [string length $y] $z
|
||
} {4 f09f9882}
|
||
test encoding-15.18 {UtfToUtfProc CESU-8 6-byte sequence} {
|
||
set y [encoding convertto cesu-8 \U10000]
|
||
binary scan $y H* z
|
||
list [string length $y] $z
|
||
} {6 eda080edb080}
|
||
test encoding-15.19 {UtfToUtfProc CESU-8 upper surrogate} {
|
||
set y [encoding convertto cesu-8 \uD800]
|
||
binary scan $y H* z
|
||
list [string length $y] $z
|
||
} {3 eda080}
|
||
test encoding-15.20 {UtfToUtfProc CESU-8 lower surrogate} {
|
||
set y [encoding convertto cesu-8 \uDC00]
|
||
binary scan $y H* z
|
||
list [string length $y] $z
|
||
} {3 edb080}
|
||
test encoding-15.21 {UtfToUtfProc CESU-8 noncharacter} {
|
||
set y [encoding convertto cesu-8 \uFFFF]
|
||
binary scan $y H* z
|
||
list [string length $y] $z
|
||
} {3 efbfbf}
|
||
test encoding-15.22 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} {
|
||
set y [encoding convertto cesu-8 \x80]
|
||
binary scan $y H* z
|
||
list [string length $y] $z
|
||
} {2 c280}
|
||
test encoding-15.23 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} {
|
||
set y [encoding convertto cesu-8 \u100]
|
||
binary scan $y H* z
|
||
list [string length $y] $z
|
||
} {2 c480}
|
||
test encoding-15.24 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} {
|
||
set y [encoding convertto cesu-8 \u3FF]
|
||
binary scan $y H* z
|
||
list [string length $y] $z
|
||
} {2 cfbf}
|
||
test encoding-15.25 {UtfToUtfProc CESU-8} {
|
||
encoding convertfrom cesu-8 \x00
|
||
} \x00
|
||
test encoding-15.26 {UtfToUtfProc CESU-8} {
|
||
encoding convertfrom -profile tcl8 cesu-8 \xC0\x80
|
||
} \x00
|
||
test encoding-15.27 {UtfToUtfProc -profile strict CESU-8} {
|
||
encoding convertfrom -profile strict cesu-8 \x00
|
||
} \x00
|
||
test encoding-15.28 {UtfToUtfProc -profile strict CESU-8} -body {
|
||
encoding convertfrom -profile strict cesu-8 \xC0\x80
|
||
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'}
|
||
test encoding-15.29 {UtfToUtfProc CESU-8} {
|
||
encoding convertto cesu-8 \x00
|
||
} \x00
|
||
test encoding-15.30 {UtfToUtfProc -profile strict CESU-8} {
|
||
encoding convertto -profile strict cesu-8 \x00
|
||
} \x00
|
||
test encoding-15.31 {UtfToUtfProc -profile strict CESU-8 (bytes F0-F4 are invalid)} -body {
|
||
encoding convertfrom -profile strict cesu-8 \xF1\x86\x83\x9C
|
||
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xF1'}
|
||
test encoding-15.32 {UtfToUtfProc CESU-8 [2f22a7364d]} -body {
|
||
encoding convertto cesu-8 \U1f600
|
||
} -result \xED\xA0\xBD\xED\xB8\x80
|
||
test encoding-15.33 {UtfToUtfProc CESU-8 [63325009a8]} -body {
|
||
encoding convertto cesu-8 \u0400
|
||
} -result \xD0\x80
|
||
|
||
test encoding-16.1 {Utf16ToUtfProc} -body {
|
||
set val [encoding convertfrom utf-16 NN]
|
||
list $val [format %x [scan $val %c]]
|
||
} -result "乎 4e4e"
|
||
test encoding-16.2 {Utf16ToUtfProc} -body {
|
||
set val [encoding convertfrom utf-16 "\xD8\xD8\xDC\xDC"]
|
||
list $val [format %x [scan $val %c]]
|
||
} -result "\U460DC 460dc"
|
||
test encoding-16.3 {Utf16ToUtfProc} -body {
|
||
set val [encoding convertfrom -profile tcl8 utf-16 "\xDC\xDC"]
|
||
list $val [format %x [scan $val %c]]
|
||
} -result "\uDCDC dcdc"
|
||
test encoding-16.4 {Ucs2ToUtfProc} -body {
|
||
set val [encoding convertfrom ucs-2 NN]
|
||
list $val [format %x [scan $val %c]]
|
||
} -result "乎 4e4e"
|
||
test encoding-16.5 {Ucs2ToUtfProc} -body {
|
||
set val [encoding convertfrom ucs-2 "\xD8\xD8\xDC\xDC"]
|
||
list $val [format %x [scan $val %c]]
|
||
} -result "\U460DC 460dc"
|
||
test encoding-16.6 {Utf32ToUtfProc} -body {
|
||
set val [encoding convertfrom utf-32le NN\0\0]
|
||
list $val [format %x [scan $val %c]]
|
||
} -result "乎 4e4e"
|
||
test encoding-16.7 {Utf32ToUtfProc} -body {
|
||
set val [encoding convertfrom utf-32be \0\0NN]
|
||
list $val [format %x [scan $val %c]]
|
||
} -result "乎 4e4e"
|
||
test encoding-16.8 {Utf32ToUtfProc} -body {
|
||
set val [encoding convertfrom -profile tcl8 utf-32 \x41\x00\x00\x41]
|
||
list $val [format %x [scan $val %c]]
|
||
} -result "\uFFFD fffd"
|
||
test encoding-16.9 {Utf32ToUtfProc} -body {
|
||
encoding convertfrom -profile tcl8 utf-32le \x00\xD8\x00\x00
|
||
} -result \uD800
|
||
test encoding-16.10 {Utf32ToUtfProc} -body {
|
||
encoding convertfrom -profile tcl8 utf-32le \x00\xDC\x00\x00
|
||
} -result \uDC00
|
||
test encoding-16.11 {Utf32ToUtfProc} -body {
|
||
encoding convertfrom -profile tcl8 utf-32le \x00\xD8\x00\x00\x00\xDC\x00\x00
|
||
} -result \uD800\uDC00
|
||
test encoding-16.12 {Utf32ToUtfProc} -body {
|
||
encoding convertfrom -profile tcl8 utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00
|
||
} -result \uDC00\uD800
|
||
test encoding-16.13 {Utf16ToUtfProc} -body {
|
||
encoding convertfrom -profile tcl8 utf-16le \x00\xD8
|
||
} -result \uD800
|
||
test encoding-16.14 {Utf16ToUtfProc} -body {
|
||
encoding convertfrom -profile tcl8 utf-16le \x00\xDC
|
||
} -result \uDC00
|
||
test encoding-16.15 {Utf16ToUtfProc} -body {
|
||
encoding convertfrom utf-16le \x00\xD8\x00\xDC
|
||
} -result \U010000
|
||
test encoding-16.16 {Utf16ToUtfProc} -body {
|
||
encoding convertfrom -profile tcl8 utf-16le \x00\xDC\x00\xD8
|
||
} -result \uDC00\uD800
|
||
test encoding-16.17 {Utf32ToUtfProc} -body {
|
||
list [encoding convertfrom -profile strict -failindex idx utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00] [set idx]
|
||
} -result {A 4}
|
||
|
||
test encoding-16.18 {
|
||
Utf16ToUtfProc, Tcl_UniCharToUtf, surrogate pairs in utf-16
|
||
} -body {
|
||
apply [list {} {
|
||
for {set i 0xD800} {$i < 0xDBFF} {incr i} {
|
||
for {set j 0xDC00} {$j < 0xDFFF} {incr j} {
|
||
set string [binary format S2 [list $i $j]]
|
||
set status [catch {
|
||
set decoded [encoding convertfrom utf-16be $string]
|
||
set encoded [encoding convertto utf-16be $decoded]
|
||
}]
|
||
if {$status || ( $encoded ne $string )} {
|
||
return [list [format %x $i] [format %x $j]]
|
||
}
|
||
}
|
||
}
|
||
return done
|
||
} [namespace current]]
|
||
} -result done
|
||
test encoding-16.19.strict {Utf16ToUtfProc, bug [d19fe0a5b]} -body {
|
||
encoding convertfrom -profile strict utf-16 "\x41\x41\x41"
|
||
} -returnCodes 1 -result {unexpected byte sequence starting at index 2: '\x41'}
|
||
test encoding-16.19.tcl8 {Utf16ToUtfProc, bug [d19fe0a5b]} -body {
|
||
encoding convertfrom -profile tcl8 utf-16 "\x41\x41\x41"
|
||
} -result \u4141\uFFFD
|
||
test encoding-16.20.tcl8 {Utf16ToUtfProc, bug [d19fe0a5b]} -body {
|
||
encoding convertfrom -profile tcl8 utf-16 "\xD8\xD8"
|
||
} -result \uD8D8
|
||
test encoding-16.20.strict {Utf16ToUtfProc, bug [d19fe0a5b]} -body {
|
||
encoding convertfrom -profile strict utf-16 "\xD8\xD8"
|
||
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xD8'}
|
||
test encoding-16.21.tcl8 {Utf32ToUtfProc, bug [d19fe0a5b]} -body {
|
||
encoding convertfrom -profile tcl8 utf-32 "\x00\x00\x00\x00\x41\x41"
|
||
} -result \x00\uFFFD
|
||
test encoding-16.21.strict {Utf32ToUtfProc, bug [d19fe0a5b]} -body {
|
||
encoding convertfrom -profile strict utf-32 "\x00\x00\x00\x00\x41\x41"
|
||
} -returnCodes 1 -result {unexpected byte sequence starting at index 4: '\x41'}
|
||
test encoding-16.22 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body {
|
||
encoding convertfrom -profile strict utf-16le \x00\xD8
|
||
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'}
|
||
test encoding-16.23 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body {
|
||
encoding convertfrom -profile strict utf-16le \x00\xDC
|
||
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'}
|
||
test encoding-16.24 {Utf32ToUtfProc} -body {
|
||
encoding convertfrom -profile tcl8 utf-32 "\xFF\xFF\xFF\xFF"
|
||
} -result \uFFFD
|
||
test encoding-16.25.strict {Utf32ToUtfProc} -body {
|
||
encoding convertfrom -profile strict utf-32 "\x01\x00\x00\x01"
|
||
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x01'}
|
||
test encoding-16.25.tcl8 {Utf32ToUtfProc} -body {
|
||
encoding convertfrom -profile tcl8 utf-32 "\x01\x00\x00\x01"
|
||
} -result \uFFFD
|
||
|
||
test encoding-17.1 {UtfToUtf16Proc} -body {
|
||
encoding convertto utf-16 "\U460DC"
|
||
} -result "\xD8\xD8\xDC\xDC"
|
||
test encoding-17.2 {UtfToUcs2Proc} -body {
|
||
encoding convertfrom utf-16 \xD8\xD8\xDC\xDC
|
||
} -result "\U460DC"
|
||
test encoding-17.3 {UtfToUtf16Proc} -body {
|
||
encoding convertto -profile tcl8 utf-16be "\uDCDC"
|
||
} -result "\xDC\xDC"
|
||
test encoding-17.4 {UtfToUtf16Proc} -body {
|
||
encoding convertto -profile tcl8 utf-16le "\uD8D8"
|
||
} -result "\xD8\xD8"
|
||
test encoding-17.5 {UtfToUtf32Proc} -body {
|
||
encoding convertto utf-32le "\U460DC"
|
||
} -result "\xDC\x60\x04\x00"
|
||
test encoding-17.6 {UtfToUtf32Proc} -body {
|
||
encoding convertto utf-32be "\U460DC"
|
||
} -result "\x00\x04\x60\xDC"
|
||
test encoding-17.7 {UtfToUtf16Proc} -body {
|
||
encoding convertto -profile strict utf-16be "\uDCDC"
|
||
} -returnCodes error -result {unexpected character at index 0: 'U+00DCDC'}
|
||
test encoding-17.8 {UtfToUtf16Proc} -body {
|
||
encoding convertto -profile strict utf-16le "\uD8D8"
|
||
} -returnCodes error -result {unexpected character at index 0: 'U+00D8D8'}
|
||
test encoding-17.9 {Utf32ToUtfProc} -body {
|
||
encoding convertfrom -profile strict utf-32 "\xFF\xFF\xFF\xFF"
|
||
} -returnCodes error -result {unexpected byte sequence starting at index 0: '\xFF'}
|
||
test encoding-17.10 {Utf32ToUtfProc} -body {
|
||
encoding convertfrom -profile tcl8 utf-32 "\xFF\xFF\xFF\xFF"
|
||
} -result \uFFFD
|
||
test encoding-17.11 {Utf32ToUtfProc} -body {
|
||
encoding convertfrom -profile strict utf-32le "\x00\xD8\x00\x00"
|
||
} -returnCodes error -result {unexpected byte sequence starting at index 0: '\x00'}
|
||
test encoding-17.12 {Utf32ToUtfProc} -body {
|
||
encoding convertfrom -profile strict utf-32le "\x00\xDC\x00\x00"
|
||
} -returnCodes error -result {unexpected byte sequence starting at index 0: '\x00'}
|
||
|
||
test encoding-18.1 {TableToUtfProc on invalid input} -body {
|
||
list [catch {encoding convertto -profile tcl8 jis0208 \\} res] $res
|
||
} -result {0 !)}
|
||
test encoding-18.2 {TableToUtfProc on invalid input with -profile strict} -body {
|
||
list [catch {encoding convertto -profile strict jis0208 \\} res] $res
|
||
} -result {1 {unexpected character at index 0: 'U+00005C'}}
|
||
test encoding-18.3 {TableToUtfProc on invalid input with -profile strict -failindex} -body {
|
||
list [catch {encoding convertto -profile strict -failindex pos jis0208 \\} res] $res $pos
|
||
} -result {0 {} 0}
|
||
test encoding-18.4 {TableToUtfProc on invalid input with -failindex -profile strict} -body {
|
||
list [catch {encoding convertto -failindex pos -profile strict jis0208 \\} res] $res $pos
|
||
} -result {0 {} 0}
|
||
test encoding-18.5 {TableToUtfProc on invalid input with -failindex} -body {
|
||
list [catch {encoding convertto -profile tcl8 -failindex pos jis0208 \\} res] $res $pos
|
||
} -result {0 !) -1}
|
||
test encoding-18.6 {TableToUtfProc on invalid input with -profile tcl8} -body {
|
||
list [catch {encoding convertto -profile tcl8 jis0208 \\} res] $res
|
||
} -result {0 !)}
|
||
|
||
test encoding-19.1 {TableFromUtfProc} -body {
|
||
encoding convertfrom -profile tcl8 ascii AÁ
|
||
} -result AÁ
|
||
test encoding-19.2 {TableFromUtfProc} -body {
|
||
encoding convertfrom -profile tcl8 ascii AÁ
|
||
} -result AÁ
|
||
test encoding-19.3 {TableFromUtfProc} -body {
|
||
encoding convertfrom -profile strict ascii AÁ
|
||
} -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\xC1'}
|
||
test encoding-19.4 {TableFromUtfProc} -body {
|
||
list [encoding convertfrom -profile tcl8 -failindex idx ascii AÁ] [set idx]
|
||
} -result [list A\xC1 -1]
|
||
test encoding-19.5 {TableFromUtfProc} -body {
|
||
list [encoding convertfrom -failindex idx -profile strict ascii A\xC1] [set idx]
|
||
} -result {A 1}
|
||
test encoding-19.6 {TableFromUtfProc} -body {
|
||
list [encoding convertfrom -failindex idx -profile strict ascii AÁB] [set idx]
|
||
} -result {A 1}
|
||
|
||
test encoding-20.1 {TableFreefProc} {
|
||
} {}
|
||
|
||
test encoding-21.1 {EscapeToUtfProc} {
|
||
} {}
|
||
|
||
test encoding-22.1 {EscapeFromUtfProc} {
|
||
} {}
|
||
|
||
set iso2022encData "\x1B\$B;d\$I\$b\$G\$O!\"%A%C%W\$49XF~;~\$K\$4EPO?\$\$\$?\$@\$\$\$?\$4=;=j\$r%-%c%C%7%e%\"%&%H\$N:]\$N\x1B(B
|
||
\x1B\$B>.@Z<jAwIU@h\$H\$7\$F;HMQ\$7\$F\$*\$j\$^\$9!#62\$lF~\$j\$^\$9\$,!\"@5\$7\$\$=;=j\$r\$4EPO?\$7\$J\$*\x1B(B
|
||
\x1B\$B\$*4j\$\$\$\$\$?\$7\$^\$9!#\$^\$?!\"BgJQ62=L\$G\$9\$,!\"=;=jJQ99\$N\$\"\$H!\"F|K\\8l%5!<%S%9It!J\x1B(B
|
||
casino_japanese@___.com \x1B\$B!K\$^\$G\$4=;=jJQ99:Q\$NO\"Mm\$r\$\$\$?\$@\$1\$J\$\$\$G\x1B(B
|
||
\x1B\$B\$7\$g\$&\$+!)\x1B(B"
|
||
|
||
set iso2022uniData [encoding convertfrom iso2022-jp $iso2022encData]
|
||
set iso2022uniData2 "私どもでは、チップご購入時にご登録いただいたご住所をキャッシュアウトの際の
|
||
小切手送付先として使用しております。恐れ入りますが、正しい住所をご登録しなお
|
||
お願いいたします。また、大変恐縮ですが、住所変更のあと、日本語サービス部(
|
||
casino_japanese@___.com )までご住所変更済の連絡をいただけないで
|
||
しょうか?"
|
||
|
||
cd [temporaryDirectory]
|
||
set fid [open iso2022.txt w]
|
||
fconfigure $fid -translation binary
|
||
puts -nonewline $fid $iso2022encData
|
||
close $fid
|
||
|
||
test encoding-23.1 {iso2022-jp escape encoding test} {
|
||
string equal $iso2022uniData $iso2022uniData2
|
||
} 1
|
||
test encoding-23.2 {iso2022-jp escape encoding test} {
|
||
# This checks that 'gets' isn't resetting the encoding inappropriately.
|
||
# [Bug #523988]
|
||
set fid [open iso2022.txt r]
|
||
fconfigure $fid -encoding iso2022-jp
|
||
set out ""
|
||
set count 0
|
||
while {[set num [gets $fid line]] >= 0} {
|
||
if {$count} {
|
||
incr count 1 ; # account for newline
|
||
append out \n
|
||
}
|
||
append out $line
|
||
incr count $num
|
||
}
|
||
close $fid
|
||
if {[string compare $iso2022uniData $out]} {
|
||
return -code error "iso2022-jp read in doesn't match original"
|
||
}
|
||
list $count $out
|
||
} [list [string length $iso2022uniData] $iso2022uniData]
|
||
test encoding-23.3 {iso2022-jp escape encoding test} {
|
||
# read $fis <size> reads size in chars, not raw bytes.
|
||
set fid [open iso2022.txt r]
|
||
fconfigure $fid -encoding iso2022-jp
|
||
set data [read $fid 50]
|
||
close $fid
|
||
return $data
|
||
} [string range $iso2022uniData 0 49] ; # 0 .. 49 inclusive == 50
|
||
cd [workingDirectory]
|
||
|
||
# Code to make the next few tests more intelligible; the code being tested
|
||
# should be in the body of the test!
|
||
proc runInSubprocess {contents {filename iso2022.tcl}} {
|
||
set theFile [makeFile $contents $filename]
|
||
try {
|
||
exec [interpreter] $theFile
|
||
} finally {
|
||
removeFile $theFile
|
||
}
|
||
}
|
||
|
||
test encoding-24.1 {EscapeFreeProc on open channels} exec {
|
||
runInSubprocess {
|
||
set f [open [file join [file dirname [info script]] iso2022.txt]]
|
||
fconfigure $f -encoding iso2022-jp
|
||
gets $f
|
||
}
|
||
} {}
|
||
test encoding-24.2 {EscapeFreeProc on open channels} {exec} {
|
||
# Bug #524674 output
|
||
runInSubprocess {
|
||
encoding system cp1252; # Bug #2891556 crash revelator
|
||
fconfigure stdout -encoding iso2022-jp
|
||
puts ab乎棙g
|
||
set env(TCL_FINALIZE_ON_EXIT) 1
|
||
exit
|
||
}
|
||
} "ab\x1B\$B8C\x1B\$(DD%\x1B(Bg"
|
||
test encoding-24.3 {EscapeFreeProc on open channels} {stdio} {
|
||
# Bug #219314 - if we don't free escape encodings correctly on channel
|
||
# closure, we go boom
|
||
set file [makeFile {
|
||
encoding system iso2022-jp
|
||
set a "乎乞也"; # 3 Japanese Kanji letters
|
||
puts $a
|
||
} iso2022.tcl]
|
||
set f [open "|[list [interpreter] $file]"]
|
||
fconfigure $f -encoding iso2022-jp
|
||
set count [gets $f line]
|
||
close $f
|
||
removeFile iso2022.tcl
|
||
list $count $line
|
||
} [list 3 乎乞也]
|
||
|
||
test encoding-24.4.strict {Parse invalid utf-8, strict} -body {
|
||
encoding convertfrom -profile strict utf-8 "\xC0\x80"
|
||
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'}
|
||
test encoding-24.4.tcl8 {UtfToUtfProc utf-8} {
|
||
encoding convertfrom -profile tcl8 utf-8 \xC0\x80
|
||
} \x00
|
||
test encoding-24.5 {Parse valid or invalid utf-8} {
|
||
string length [encoding convertfrom -profile tcl8 utf-8 "\xC0\x81"]
|
||
} 2
|
||
test encoding-24.6 {Parse valid or invalid utf-8} {
|
||
string length [encoding convertfrom -profile tcl8 utf-8 "\xC1\xBF"]
|
||
} 2
|
||
test encoding-24.7 {Parse valid or invalid utf-8} {
|
||
string length [encoding convertfrom utf-8 "\xC2\x80"]
|
||
} 1
|
||
test encoding-24.8 {Parse valid or invalid utf-8} {
|
||
string length [encoding convertfrom -profile tcl8 utf-8 "\xE0\x80\x80"]
|
||
} 3
|
||
test encoding-24.9 {Parse valid or invalid utf-8} {
|
||
string length [encoding convertfrom -profile tcl8 utf-8 "\xE0\x9F\xBF"]
|
||
} 3
|
||
test encoding-24.10 {Parse valid or invalid utf-8} {
|
||
string length [encoding convertfrom utf-8 "\xE0\xA0\x80"]
|
||
} 1
|
||
test encoding-24.11 {Parse valid or invalid utf-8} {
|
||
string length [encoding convertfrom -profile tcl8 utf-8 "\xEF\xBF\xBF"]
|
||
} 1
|
||
test encoding-24.12 {Parse invalid utf-8} -body {
|
||
encoding convertfrom -profile tcl8 utf-8 "\xC0\x81"
|
||
} -result \xC0\x81
|
||
test encoding-24.12.1 {Parse invalid utf-8} -body {
|
||
encoding convertfrom -profile strict utf-8 "\xC0\x81"
|
||
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'}
|
||
test encoding-24.13 {Parse invalid utf-8} -body {
|
||
encoding convertfrom -profile tcl8 utf-8 "\xC1\xBF"
|
||
} -result \xC1\xBF
|
||
test encoding-24.13.1 {Parse invalid utf-8} -body {
|
||
encoding convertfrom -profile strict utf-8 "\xC1\xBF"
|
||
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC1'}
|
||
test encoding-24.14 {Parse valid utf-8} {
|
||
encoding convertfrom utf-8 "\xC2\x80"
|
||
} \x80
|
||
test encoding-24.15.strict {Parse invalid utf-8, -profile strict} -body {
|
||
encoding convertfrom -profile strict utf-8 "Z\xE0\x80"
|
||
} -returnCodes 1 -result "unexpected byte sequence starting at index 1: '\\xE0'"
|
||
test encoding-24.15.tcl8 {Parse invalid utf-8, -profile tcl8} -body {
|
||
encoding convertfrom -profile tcl8 utf-8 "Z\xE0\x80"
|
||
} -result Z\xE0\u20AC
|
||
test encoding-24.16 {Parse valid or invalid utf-8} -constraints testbytestring -body {
|
||
encoding convertto utf-8 [testbytestring "Z\u4343\x80"]
|
||
} -returnCodes 1 -result {expected code point values below 0xff but value at byte offset 1 was 0x4343}
|
||
test encoding-24.17 {Parse valid or invalid utf-8} -constraints testbytestring -body {
|
||
encoding convertto utf-8 [testbytestring "Z\xE0\x80"]
|
||
} -result "Z\xC3\xA0\xE2\x82\xAC"
|
||
test encoding-24.18 {Parse valid or invalid utf-8} -constraints testbytestring -body {
|
||
encoding convertto utf-8 [testbytestring "Z\xE0\x80xxxxxx"]
|
||
} -result "Z\xC3\xA0\xE2\x82\xACxxxxxx"
|
||
test encoding-24.19.1 {Parse valid or invalid utf-8} -body {
|
||
encoding convertto -profile tcl8 utf-8 "ZX\uD800"
|
||
} -result ZX\xED\xA0\x80
|
||
test encoding-24.19.2 {Parse valid or invalid utf-8} -body {
|
||
encoding convertto -profile strict utf-8 "ZX\uD800"
|
||
} -returnCodes 1 -match glob -result "unexpected character at index 2: 'U+00D800'"
|
||
test encoding-24.20 {Parse with -profile tcl8 but without providing encoding} -body {
|
||
encoding convertfrom -profile tcl8 "\x20"
|
||
} -result {wrong # args: should be "::tcl::encoding::convertfrom ?-profile profile? ?-failindex var? encoding data" or "::tcl::encoding::convertfrom data"} -returnCodes error
|
||
test encoding-24.21 {Parse with -profile tcl8 but without providing encoding} -body {
|
||
string length [encoding convertto -profile tcl8 "\x20"]
|
||
} -result {wrong # args: should be "::tcl::encoding::convertto ?-profile profile? ?-failindex var? encoding data" or "::tcl::encoding::convertto data"} -returnCodes error
|
||
test encoding-24.22 {Syntax error, two encodings} -body {
|
||
encoding convertfrom iso8859-1 utf-8 "ZX\uD800"
|
||
} -result {bad option "iso8859-1": must be -profile or -failindex} -returnCodes error
|
||
test encoding-24.23 {Syntax error, two encodings} -body {
|
||
encoding convertto iso8859-1 utf-8 "ZX\uD800"
|
||
} -result {bad option "iso8859-1": must be -profile or -failindex} -returnCodes error
|
||
test encoding-24.24 {Parse invalid utf-8 with -profile strict} -body {
|
||
encoding convertfrom -profile strict utf-8 "\xC0\x80\x00\x00"
|
||
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'}
|
||
test encoding-24.25 {Parse invalid utf-8 with -profile strict} -body {
|
||
encoding convertfrom -profile strict utf-8 "\x40\x80\x00\x00"
|
||
} -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\x80'}
|
||
test encoding-24.26 {Parse valid utf-8 with -profile strict} -body {
|
||
encoding convertfrom -profile strict utf-8 "\xF1\x80\x80\x80"
|
||
} -result \U40000
|
||
test encoding-24.27 {Parse invalid utf-8 with -profile strict} -body {
|
||
encoding convertfrom -profile strict utf-8 "\xF0\x80\x80\x80"
|
||
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xF0'}
|
||
test encoding-24.28 {Parse invalid utf-8 with -profile strict} -body {
|
||
encoding convertfrom -profile strict utf-8 "\xFF\x00\x00"
|
||
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xFF'}
|
||
test encoding-24.29 {Parse invalid utf-8} -body {
|
||
encoding convertfrom utf-8 \xEF\xBF\xBF
|
||
} -result \uFFFF
|
||
test encoding-24.30 {Parse noncharacter with -profile strict} -body {
|
||
encoding convertfrom -profile strict utf-8 \xEF\xBF\xBF
|
||
} -result \uFFFF
|
||
test encoding-24.31 {Parse invalid utf-8 with -profile tcl8} -body {
|
||
encoding convertfrom -profile tcl8 utf-8 \xEF\xBF\xBF
|
||
} -result \uFFFF
|
||
test encoding-24.32 {Try to generate invalid utf-8} -body {
|
||
encoding convertto utf-8 \uFFFF
|
||
} -result \xEF\xBF\xBF
|
||
test encoding-24.33 {Try to generate invalid utf-8} -body {
|
||
encoding convertto -profile strict utf-8 \uFFFF
|
||
} -result \xEF\xBF\xBF
|
||
test encoding-24.34 {Try to generate invalid utf-8 with -profile tcl8} -body {
|
||
encoding convertto -profile tcl8 utf-8 \uFFFF
|
||
} -result \xEF\xBF\xBF
|
||
test encoding-24.35 {Parse invalid utf-8} -body {
|
||
encoding convertfrom -profile tcl8 utf-8 \xED\xA0\x80
|
||
} -result \uD800
|
||
test encoding-24.36 {Parse invalid utf-8 with -profile strict} -body {
|
||
encoding convertfrom -profile strict utf-8 \xED\xA0\x80
|
||
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'}
|
||
test encoding-24.37 {Parse invalid utf-8 with -profile tcl8} -body {
|
||
encoding convertfrom -profile tcl8 utf-8 \xED\xA0\x80
|
||
} -result \uD800
|
||
test encoding-24.38.1 {Try to generate invalid utf-8} -body {
|
||
encoding convertto -profile tcl8 utf-8 \uD800
|
||
} -result \xED\xA0\x80
|
||
test encoding-24.38.2 {Try to generate invalid utf-8 - default profile} -body {
|
||
encoding convertto utf-8 \uD800
|
||
} -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'}
|
||
test encoding-24.39 {Try to generate invalid utf-8 with -profile strict} -body {
|
||
encoding convertto -profile strict utf-8 \uD800
|
||
} -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'}
|
||
test encoding-24.40 {Try to generate invalid utf-8 with -profile tcl8} -body {
|
||
encoding convertto -profile tcl8 utf-8 \uD800
|
||
} -result \xED\xA0\x80
|
||
test encoding-24.41 {Parse invalid utf-8 with -profile strict} -body {
|
||
encoding convertfrom -profile strict utf-8 \xED\xA0\x80\xED\xB0\x80
|
||
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'}
|
||
test encoding-24.42 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body {
|
||
encoding convertfrom -profile tcl8 utf-8 \xF0\x80\x80\x80
|
||
} -result \xF0\u20AC\u20AC\u20AC
|
||
test encoding-24.43 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body {
|
||
encoding convertfrom -profile tcl8 utf-8 \x80
|
||
} -result \u20AC
|
||
test encoding-24.44 {Try to generate invalid ucs-2 with -profile strict} -body {
|
||
encoding convertto -profile strict ucs-2 \uD800
|
||
} -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'}
|
||
test encoding-24.45 {Try to generate invalid ucs-2 with -profile strict} -body {
|
||
encoding convertto -profile strict ucs-2 \U10000
|
||
} -returnCodes 1 -result {unexpected character at index 0: 'U+010000'}
|
||
|
||
file delete [file join [temporaryDirectory] iso2022.txt]
|
||
|
||
#
|
||
# Begin jajp encoding round-trip conformity tests
|
||
#
|
||
proc foreach-jisx0208 {varName command} {
|
||
upvar 1 $varName code
|
||
foreach range {
|
||
{2121 217E}
|
||
{2221 222E}
|
||
{223A 2241}
|
||
{224A 2250}
|
||
{225C 226A}
|
||
{2272 2279}
|
||
{227E 227E}
|
||
{2330 2339}
|
||
{2421 2473}
|
||
{2521 2576}
|
||
{2821 2821}
|
||
{282C 282C}
|
||
{2837 2837}
|
||
|
||
{30 21 4E 7E}
|
||
{4F21 4F53}
|
||
|
||
{50 21 73 7E}
|
||
{7421 7426}
|
||
} {
|
||
if {[llength $range] == 2} {
|
||
# for adhoc range. simple {first last}. inclusive.
|
||
scan $range %x%x first last
|
||
for {set i $first} {$i <= $last} {incr i} {
|
||
set code $i
|
||
uplevel 1 $command
|
||
}
|
||
} elseif {[llength $range] == 4} {
|
||
# for uniform range.
|
||
scan $range %x%x%x%x h0 l0 hend lend
|
||
for {set hi $h0} {$hi <= $hend} {incr hi} {
|
||
for {set lo $l0} {$lo <= $lend} {incr lo} {
|
||
set code [expr {$hi << 8 | ($lo & 0xff)}]
|
||
uplevel 1 $command
|
||
}
|
||
}
|
||
} else {
|
||
error "really?"
|
||
}
|
||
}
|
||
}
|
||
proc gen-jisx0208-euc-jp {code} {
|
||
binary format cc \
|
||
[expr {($code >> 8) | 0x80}] [expr {($code & 0xFF) | 0x80}]
|
||
}
|
||
proc gen-jisx0208-iso2022-jp {code} {
|
||
binary format a3cca3 \
|
||
"\x1B\$B" [expr {$code >> 8}] [expr {$code & 0xFF}] "\x1B(B"
|
||
}
|
||
proc gen-jisx0208-cp932 {code} {
|
||
set c1 [expr {($code >> 8) | 0x80}]
|
||
set c2 [expr {($code & 0xff)| 0x80}]
|
||
if {$c1 % 2} {
|
||
set c1 [expr {($c1 >> 1) + ($c1 < 0xDF ? 0x31 : 0x71)}]
|
||
incr c2 [expr {- (0x60 + ($c2 < 0xE0))}]
|
||
} else {
|
||
set c1 [expr {($c1 >> 1) + ($c1 < 0xDF ? 0x30 : 0x70)}]
|
||
incr c2 -2
|
||
}
|
||
binary format cc $c1 $c2
|
||
}
|
||
proc channel-diff {fa fb} {
|
||
set diff {}
|
||
while {[gets $fa la] >= 0 && [gets $fb lb] >= 0} {
|
||
if {[string compare $la $lb] == 0} continue
|
||
# lappend diff $la $lb
|
||
|
||
# For more readable (easy to analyze) output.
|
||
set code [lindex $la 0]
|
||
binary scan [lindex $la 1] H* expected
|
||
binary scan [lindex $lb 1] H* got
|
||
lappend diff [list $code $expected $got]
|
||
}
|
||
return $diff
|
||
}
|
||
|
||
# Create char tables.
|
||
cd [temporaryDirectory]
|
||
foreach enc {cp932 euc-jp iso2022-jp} {
|
||
set f [open $enc.chars w]
|
||
fconfigure $f -encoding iso8859-1
|
||
foreach-jisx0208 code {
|
||
puts $f [format "%04X %s" $code [gen-jisx0208-$enc $code]]
|
||
}
|
||
close $f
|
||
}
|
||
# shiftjis == cp932 for jisx0208.
|
||
file copy -force cp932.chars shiftjis.chars
|
||
|
||
set NUM 0
|
||
foreach from {cp932 shiftjis euc-jp iso2022-jp} {
|
||
foreach to {cp932 shiftjis euc-jp iso2022-jp} {
|
||
test encoding-25.[incr NUM] "jisx0208 $from => $to" -setup {
|
||
cd [temporaryDirectory]
|
||
} -body {
|
||
set f [open $from.chars]
|
||
fconfigure $f -encoding $from
|
||
set out [open $from.$to.tcltestout w]
|
||
fconfigure $out -encoding $to
|
||
puts -nonewline $out [read $f]
|
||
close $out
|
||
close $f
|
||
# then compare $to.chars <=> $from.to.tcltestout as binary.
|
||
set fa [open $to.chars rb]
|
||
set fb [open $from.$to.tcltestout rb]
|
||
channel-diff $fa $fb
|
||
# Difference should be empty.
|
||
} -cleanup {
|
||
close $fa
|
||
close $fb
|
||
} -result {}
|
||
}
|
||
}
|
||
|
||
test encoding-26.0 {Tcl_GetEncodingSearchPath} -setup {
|
||
set origPath [encoding dirs]
|
||
encoding dirs slappy
|
||
} -body {
|
||
encoding dirs
|
||
} -cleanup {
|
||
encoding dirs $origPath
|
||
} -result slappy
|
||
|
||
file delete {*}[glob -directory [temporaryDirectory] *.chars *.tcltestout]
|
||
# ===> Cut here <===
|
||
|
||
# EscapeFreeProc, GetTableEncoding, unilen are fully tested by the rest of
|
||
# this file.
|
||
|
||
|
||
test encoding-27.1 {encoding dirs basic behavior} -returnCodes error -body {
|
||
encoding dirs ? ?
|
||
} -result {wrong # args: should be "encoding dirs ?dirList?"}
|
||
test encoding-27.2 {encoding dirs basic behavior} -returnCodes error -body {
|
||
encoding dirs "\{not a list"
|
||
} -result "expected directory list but got \"\{not a list\""
|
||
|
||
}; # proc runtests
|
||
|
||
|
||
test encoding-28.0 {all encodings load} -body {
|
||
set string hello
|
||
foreach name [encoding names] {
|
||
if {$name ne "unicode"} {
|
||
incr count
|
||
}
|
||
encoding convertto -profile tcl8 $name $string
|
||
|
||
# discard the cached internal representation of Tcl_Encoding
|
||
# Unfortunately, without this, encoding 2-1 fails.
|
||
llength $name
|
||
}
|
||
return $count
|
||
} -result 94
|
||
|
||
runtests
|
||
|
||
test encoding-bug-183a1adcc0-1 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints {
|
||
testencoding
|
||
} -body {
|
||
# Note - buffers are initialized to \xFF
|
||
list [catch {testencoding Tcl_UtfToExternal utf-16 A {start end} {} 1} result] $result
|
||
} -result [list 0 [list nospace {} \xFF]]
|
||
|
||
test encoding-bug-183a1adcc0-2 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints {
|
||
testencoding
|
||
} -body {
|
||
# Note - buffers are initialized to \xFF
|
||
list [catch {testencoding Tcl_UtfToExternal utf-16 A {start end} {} 0} result] $result
|
||
} -result [list 0 [list nospace {} {}]]
|
||
|
||
test encoding-bug-183a1adcc0-3 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints {
|
||
testencoding
|
||
} -body {
|
||
# Note - buffers are initialized to \xFF
|
||
list [catch {testencoding Tcl_UtfToExternal utf-16 A {start end} {} 2} result] $result
|
||
} -result [list 0 [list nospace {} \x00\x00]]
|
||
|
||
test encoding-bug-183a1adcc0-4 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints {
|
||
testencoding
|
||
} -body {
|
||
# Note - buffers are initialized to \xFF
|
||
list [catch {testencoding Tcl_UtfToExternal utf-16 A {start end} {} 3} result] $result
|
||
} -result [list 0 [list nospace {} \x00\x00\xFF]]
|
||
|
||
test encoding-bug-183a1adcc0-5 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints {
|
||
testencoding
|
||
} -body {
|
||
list [catch {testencoding Tcl_UtfToExternal utf-16 A {start end} {} 4} result] $result
|
||
} -result [list 0 [list ok {} [expr {$::tcl_platform(byteOrder) eq "littleEndian" ? "\x41\x00" : "\x00\x41"}]\x00\x00]]
|
||
|
||
}
|
||
|
||
test encoding-29.0 {get encoding nul terminator lengths} -constraints {
|
||
testencoding
|
||
} -body {
|
||
list \
|
||
[testencoding nullength ascii] \
|
||
[testencoding nullength utf-16] \
|
||
[testencoding nullength utf-32] \
|
||
[testencoding nullength gb12345] \
|
||
[testencoding nullength ksc5601]
|
||
} -result {1 2 4 2 2}
|
||
|
||
test encoding-30.0 {encoding convertto large strings UINT_MAX} -constraints {
|
||
perf
|
||
} -body {
|
||
# Test to ensure not misinterpreted as -1
|
||
list [string length [set s [string repeat A 0xFFFFFFFF]]] [string equal $s [encoding convertto ascii $s]]
|
||
} -result {4294967295 1}
|
||
|
||
test encoding-30.1 {encoding convertto large strings > 4GB} -constraints {
|
||
perf
|
||
} -body {
|
||
list [string length [set s [string repeat A 0x100000000]]] [string equal $s [encoding convertto ascii $s]]
|
||
} -result {4294967296 1}
|
||
|
||
test encoding-30.2 {encoding convertfrom large strings UINT_MAX} -constraints {
|
||
perf
|
||
} -body {
|
||
# Test to ensure not misinterpreted as -1
|
||
list [string length [set s [string repeat A 0xFFFFFFFF]]] [string equal $s [encoding convertfrom ascii $s]]
|
||
} -result {4294967295 1}
|
||
|
||
test encoding-30.3 {encoding convertfrom large strings > 4GB} -constraints {
|
||
perf
|
||
} -body {
|
||
list [string length [set s [string repeat A 0x100000000]]] [string equal $s [encoding convertfrom ascii $s]]
|
||
} -result {4294967296 1}
|
||
|
||
# TIP 716 tests
|
||
tcltests::testnumargs "encoding user" "" ""
|
||
test encoding-31.0 {encoding user} -body {
|
||
encoding user
|
||
} -result [expr {$::tcl_platform(platform) eq "windows" ? [tcltests::windowscodepage] : [encoding system]}]
|
||
|
||
test encoding-31.1 {encoding system does not change encoding user} -setup {
|
||
set system [encoding system]
|
||
set user [encoding user]
|
||
} -body {
|
||
encoding system ascii
|
||
list [encoding system] [string equal [encoding user] $user]
|
||
} -cleanup {
|
||
encoding system $system
|
||
unset system
|
||
unset user
|
||
} -result {ascii 1}
|
||
|
||
test encoding-31.2 {encoding system on newer Windows always returns utf-8} -body {
|
||
string equal [encoding system] \
|
||
[expr {
|
||
[tcltests::windowsbuildnumber] > 18362 ?
|
||
"utf-8" : [tcltests::windowscodepage]
|
||
}]
|
||
} -constraints win -result 1
|
||
|
||
test encoding-31.3 {Tcl_GetEncodingNameFromEnvironment} -constraints testencoding -body {
|
||
# Primarily tests that stub is callable from outside tcl.{so,dll} via stubs
|
||
testencoding Tcl_GetEncodingNameFromEnvironment
|
||
} -result [encoding system]
|
||
|
||
test encoding-31.4 {Tcl_GetEncodingNameForUser} -constraints testencoding -body {
|
||
# Primarily tests that stub is callable from outside tcl.{so,dll} via stubs
|
||
testencoding Tcl_GetEncodingNameForUser
|
||
} -result [encoding user]
|
||
|
||
test encoding-bug-6a3e2cb0f0-1 {Bug [6a3e2cb0f0] - invalid bytes in escape encodings} -body {
|
||
encoding convertfrom -profile tcl8 iso2022-jp x\x1B\x7Aaby
|
||
} -result x\uFFFDy
|
||
test encoding-bug-6a3e2cb0f0-2 {Bug [6a3e2cb0f0] - invalid bytes in escape encodings} -body {
|
||
encoding convertfrom -profile strict iso2022-jp x\x1B\x7Aaby
|
||
} -returnCodes error -result {unexpected byte sequence starting at index 1: '\x1B'}
|
||
test encoding-bug-6a3e2cb0f0-3 {Bug [6a3e2cb0f0] - invalid bytes in escape encodings} -body {
|
||
encoding convertfrom -profile replace iso2022-jp x\x1B\x7Aaby
|
||
} -result x\uFFFDy
|
||
|
||
test encoding-bug-66ffafd309-1-tcl8 {Bug [66ffafd309] - truncated DBCS} -body {
|
||
encoding convertfrom -profile tcl8 gb12345 x
|
||
} -result x
|
||
test encoding-bug-66ffafd309-1-strict {Bug [66ffafd309] - truncated DBCS} -body {
|
||
encoding convertfrom -profile strict gb12345 x
|
||
} -result {unexpected byte sequence starting at index 0: '\x78'} -returnCodes error
|
||
test encoding-bug-66ffafd309-1-replace {Bug [66ffafd309] - truncated DBCS} -body {
|
||
encoding convertfrom -profile replace gb12345 x
|
||
} -result \uFFFD
|
||
test encoding-bug-66ffafd309-2-tcl8 {Bug [66ffafd309] - invalid DBCS} -body {
|
||
# Not truncated but invalid
|
||
encoding convertfrom -profile tcl8 jis0208 \x78\x79
|
||
} -result \x78\x79
|
||
test encoding-bug-66ffafd309-2-strict {Bug [66ffafd309] - invalid DBCS} -body {
|
||
# Not truncated but invalid
|
||
encoding convertfrom -profile strict jis0208 \x78\x79
|
||
} -result {unexpected byte sequence starting at index 1: '\x79'} -returnCodes error
|
||
test encoding-bug-66ffafd309-2-replace {Bug [66ffafd309] - invalid DBCS} -body {
|
||
# Not truncated but invalid
|
||
encoding convertfrom -profile replace jis0208 \x78\x79
|
||
} -result \uFFFD\uFFFD
|
||
|
||
test encoding-bug-201c7a3aa6-strict {Crash encoding non-BMP to iso2022} -body {
|
||
encoding convertto -profile strict iso2022 \U1f600
|
||
} -result {unexpected character at index 0: 'U+01F600'} -returnCodes error
|
||
|
||
test encoding-bug-201c7a3aa6-replace {Crash encoding non-BMP to iso2022} -body {
|
||
encoding convertto -profile replace iso2022 \U1f600
|
||
} -result ?
|
||
|
||
test encoding-bug-201c7a3aa6-tcl8 {Crash encoding non-BMP to iso2022} -body {
|
||
encoding convertto -profile tcl8 iso2022 \U1f600
|
||
} -result ?
|
||
|
||
test encoding-bug-7346adc50f-strict {OOM on convertfrom truncated iso2022 - strict} -body {
|
||
encoding convertfrom -profile strict iso2022-jp "\x1b\$B\$*;n\$"
|
||
} -result {unexpected byte sequence starting at index 7: '\x24'} -returnCodes error
|
||
|
||
test encoding-bug-7346adc50f-failindex {OOM on convertfrom truncated iso2022 - failindex} -body {
|
||
list [encoding convertfrom -failindex failix iso2022-jp "\x1b\$B\$*;n\$"] $failix
|
||
} -cleanup {
|
||
unset -nocomplain failix
|
||
} -result [list \u304A\u8A66 7]
|
||
|
||
test encoding-bug-7346adc50f-replace {OOM on convertfrom truncated iso2022 - replace} -body {
|
||
encoding convertfrom -profile replace iso2022-jp "\x1b\$B\$*;n\$"
|
||
} -result \u304A\u8A66\uFFFD
|
||
|
||
test encoding-bug-7346adc50f-tcl8 {OOM on convertfrom truncated iso2022 - tcl8} -body {
|
||
encoding convertfrom -profile tcl8 iso2022-jp "\x1b\$B\$*;n\$"
|
||
} -result \u304A\u8A66\uFFFD
|
||
|
||
test encoding-dirs-bug-87b69745be {encoding dirs reset on interp creation} -setup {
|
||
set origEncodingDirs [encoding dirs]
|
||
} -cleanup {
|
||
encoding dirs $origEncodingDirs
|
||
unset -nocomplain origEncodingDirs
|
||
} -body {
|
||
encoding dirs [linsert [encoding dirs] end /temp]
|
||
interp delete [interp create]
|
||
encoding dirs
|
||
} -result [linsert [encoding dirs] end /temp]
|
||
|
||
# cleanup
|
||
namespace delete ::tcl::test::encoding
|
||
::tcltest::cleanupTests
|
||
return
|
||
|
||
# Local Variables:
|
||
# mode: tcl
|
||
# End:
|