-
Notifications
You must be signed in to change notification settings - Fork 253
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
- Loading branch information
1 parent
c58d1e2
commit ae8bac7
Showing
10 changed files
with
191 additions
and
89 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import { BSONError } from './error'; | ||
|
||
type TextDecoder = { | ||
readonly encoding: string; | ||
readonly fatal: boolean; | ||
readonly ignoreBOM: boolean; | ||
decode(input?: Uint8Array): string; | ||
}; | ||
type TextDecoderConstructor = { | ||
new (label: 'utf8', options: { fatal: boolean; ignoreBOM?: boolean }): TextDecoder; | ||
}; | ||
|
||
// parse utf8 globals | ||
declare const TextDecoder: TextDecoderConstructor; | ||
let TextDecoderFatal: TextDecoder; | ||
let TextDecoderNonFatal: TextDecoder; | ||
|
||
/** | ||
* Determines if the passed in bytes are valid utf8 | ||
* @param bytes - An array of 8-bit bytes. Must be indexable and have length property | ||
* @param start - The index to start validating | ||
* @param end - The index to end validating | ||
*/ | ||
export function parseUtf8(buffer: Uint8Array, start: number, end: number, fatal: boolean): string { | ||
if (fatal) { | ||
TextDecoderFatal ??= new TextDecoder('utf8', { fatal: true }); | ||
try { | ||
return TextDecoderFatal.decode(buffer.subarray(start, end)); | ||
} catch (cause) { | ||
throw new BSONError('Invalid UTF-8 string in BSON document', { cause }); | ||
} | ||
} | ||
TextDecoderNonFatal ??= new TextDecoder('utf8', { fatal: false }); | ||
return TextDecoderNonFatal.decode(buffer.subarray(start, end)); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
// extra error cases copied from wpt/encoding/textdecoder-fatal.any.js | ||
// commit sha: 7c9f867 | ||
// link: https://github.com/web-platform-tests/wpt/commit/7c9f8674d9809731e8919073d957d6233f6e0544 | ||
|
||
export const utf8WebPlatformSpecTests = [ | ||
{ encoding: 'utf-8', input: [0xff], name: 'invalid code' }, | ||
{ encoding: 'utf-8', input: [0xc0], name: 'ends early' }, | ||
{ encoding: 'utf-8', input: [0xe0], name: 'ends early 2' }, | ||
{ encoding: 'utf-8', input: [0xc0, 0x00], name: 'invalid trail' }, | ||
{ encoding: 'utf-8', input: [0xc0, 0xc0], name: 'invalid trail 2' }, | ||
{ encoding: 'utf-8', input: [0xe0, 0x00], name: 'invalid trail 3' }, | ||
{ encoding: 'utf-8', input: [0xe0, 0xc0], name: 'invalid trail 4' }, | ||
{ encoding: 'utf-8', input: [0xe0, 0x80, 0x00], name: 'invalid trail 5' }, | ||
{ encoding: 'utf-8', input: [0xe0, 0x80, 0xc0], name: 'invalid trail 6' }, | ||
{ encoding: 'utf-8', input: [0xfc, 0x80, 0x80, 0x80, 0x80, 0x80], name: '> 0x10ffff' }, | ||
{ encoding: 'utf-8', input: [0xfe, 0x80, 0x80, 0x80, 0x80, 0x80], name: 'obsolete lead byte' }, | ||
|
||
// Overlong encodings | ||
{ encoding: 'utf-8', input: [0xc0, 0x80], name: 'overlong U+0000 - 2 bytes' }, | ||
{ encoding: 'utf-8', input: [0xe0, 0x80, 0x80], name: 'overlong U+0000 - 3 bytes' }, | ||
{ encoding: 'utf-8', input: [0xf0, 0x80, 0x80, 0x80], name: 'overlong U+0000 - 4 bytes' }, | ||
{ encoding: 'utf-8', input: [0xf8, 0x80, 0x80, 0x80, 0x80], name: 'overlong U+0000 - 5 bytes' }, | ||
{ | ||
encoding: 'utf-8', | ||
input: [0xfc, 0x80, 0x80, 0x80, 0x80, 0x80], | ||
name: 'overlong U+0000 - 6 bytes' | ||
}, | ||
|
||
{ encoding: 'utf-8', input: [0xc1, 0xbf], name: 'overlong U+007f - 2 bytes' }, | ||
{ encoding: 'utf-8', input: [0xe0, 0x81, 0xbf], name: 'overlong U+007f - 3 bytes' }, | ||
{ encoding: 'utf-8', input: [0xf0, 0x80, 0x81, 0xbf], name: 'overlong U+007f - 4 bytes' }, | ||
{ encoding: 'utf-8', input: [0xf8, 0x80, 0x80, 0x81, 0xbf], name: 'overlong U+007f - 5 bytes' }, | ||
{ | ||
encoding: 'utf-8', | ||
input: [0xfc, 0x80, 0x80, 0x80, 0x81, 0xbf], | ||
name: 'overlong U+007f - 6 bytes' | ||
}, | ||
|
||
{ encoding: 'utf-8', input: [0xe0, 0x9f, 0xbf], name: 'overlong U+07ff - 3 bytes' }, | ||
{ encoding: 'utf-8', input: [0xf0, 0x80, 0x9f, 0xbf], name: 'overlong U+07ff - 4 bytes' }, | ||
{ encoding: 'utf-8', input: [0xf8, 0x80, 0x80, 0x9f, 0xbf], name: 'overlong U+07ff - 5 bytes' }, | ||
{ | ||
encoding: 'utf-8', | ||
input: [0xfc, 0x80, 0x80, 0x80, 0x9f, 0xbf], | ||
name: 'overlong U+07ff - 6 bytes' | ||
}, | ||
|
||
{ encoding: 'utf-8', input: [0xf0, 0x8f, 0xbf, 0xbf], name: 'overlong U+ffff - 4 bytes' }, | ||
{ encoding: 'utf-8', input: [0xf8, 0x80, 0x8f, 0xbf, 0xbf], name: 'overlong U+ffff - 5 bytes' }, | ||
{ | ||
encoding: 'utf-8', | ||
input: [0xfc, 0x80, 0x80, 0x8f, 0xbf, 0xbf], | ||
name: 'overlong U+ffff - 6 bytes' | ||
}, | ||
|
||
{ encoding: 'utf-8', input: [0xf8, 0x84, 0x8f, 0xbf, 0xbf], name: 'overlong U+10ffff - 5 bytes' }, | ||
{ | ||
encoding: 'utf-8', | ||
input: [0xfc, 0x80, 0x84, 0x8f, 0xbf, 0xbf], | ||
name: 'overlong U+10ffff - 6 bytes' | ||
}, | ||
|
||
// UTf-16 surrogates encoded as code points in UTf-8 | ||
{ encoding: 'utf-8', input: [0xed, 0xa0, 0x80], name: 'lead surrogate' }, | ||
{ encoding: 'utf-8', input: [0xed, 0xb0, 0x80], name: 'trail surrogate' }, | ||
{ encoding: 'utf-8', input: [0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80], name: 'surrogate pair' } | ||
]; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters