Houjie
2025-07-24 1bc8f80935add7215fa98de1ab8b375b222a2046
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
var encodings_1 = require("../../encoding/encodings");
var finished_1 = require("../../encoding/finished");
var terminology_1 = require("../../encoding/terminology");
var utilities_1 = require("../../encoding/utilities");
/**
 * @constructor
 * @implements {Decoder}
 * @param {{fatal: boolean}} options
 */
var UTF8Decoder = /** @class */ (function () {
    function UTF8Decoder(options) {
        this.fatal = options.fatal;
        // utf-8's decoder's has an associated utf-8 code point, utf-8
        // bytes seen, and utf-8 bytes needed (all initially 0), a utf-8
        // lower boundary (initially 0x80), and a utf-8 upper boundary
        // (initially 0xBF).
        /** @type {number} */ this.utf8_code_point = 0,
            /** @type {number} */ this.utf8_bytes_seen = 0,
            /** @type {number} */ this.utf8_bytes_needed = 0,
            /** @type {number} */ this.utf8_lower_boundary = 0x80,
            /** @type {number} */ this.utf8_upper_boundary = 0xBF;
    }
    /**
     * @param {Stream} stream The stream of bytes being decoded.
     * @param {number} bite The next byte read from the stream.
     * @return {?(number|!Array.<number>)} The next code point(s)
     *     decoded, or null if not enough data exists in the input
     *     stream to decode a complete code point.
     */
    UTF8Decoder.prototype.handler = function (stream, bite) {
        // 1. If byte is end-of-stream and utf-8 bytes needed is not 0,
        // set utf-8 bytes needed to 0 and return error.
        if (bite === terminology_1.end_of_stream && this.utf8_bytes_needed !== 0) {
            this.utf8_bytes_needed = 0;
            return encodings_1.decoderError(this.fatal);
        }
        // 2. If byte is end-of-stream, return finished.
        if (bite === terminology_1.end_of_stream)
            return finished_1.finished;
        // 3. If utf-8 bytes needed is 0, based on byte:
        if (this.utf8_bytes_needed === 0) {
            // 0x00 to 0x7F
            if (utilities_1.inRange(bite, 0x00, 0x7F)) {
                // Return a code point whose value is byte.
                return bite;
            }
            // 0xC2 to 0xDF
            else if (utilities_1.inRange(bite, 0xC2, 0xDF)) {
                // 1. Set utf-8 bytes needed to 1.
                this.utf8_bytes_needed = 1;
                // 2. Set UTF-8 code point to byte & 0x1F.
                this.utf8_code_point = bite & 0x1F;
            }
            // 0xE0 to 0xEF
            else if (utilities_1.inRange(bite, 0xE0, 0xEF)) {
                // 1. If byte is 0xE0, set utf-8 lower boundary to 0xA0.
                if (bite === 0xE0)
                    this.utf8_lower_boundary = 0xA0;
                // 2. If byte is 0xED, set utf-8 upper boundary to 0x9F.
                if (bite === 0xED)
                    this.utf8_upper_boundary = 0x9F;
                // 3. Set utf-8 bytes needed to 2.
                this.utf8_bytes_needed = 2;
                // 4. Set UTF-8 code point to byte & 0xF.
                this.utf8_code_point = bite & 0xF;
            }
            // 0xF0 to 0xF4
            else if (utilities_1.inRange(bite, 0xF0, 0xF4)) {
                // 1. If byte is 0xF0, set utf-8 lower boundary to 0x90.
                if (bite === 0xF0)
                    this.utf8_lower_boundary = 0x90;
                // 2. If byte is 0xF4, set utf-8 upper boundary to 0x8F.
                if (bite === 0xF4)
                    this.utf8_upper_boundary = 0x8F;
                // 3. Set utf-8 bytes needed to 3.
                this.utf8_bytes_needed = 3;
                // 4. Set UTF-8 code point to byte & 0x7.
                this.utf8_code_point = bite & 0x7;
            }
            // Otherwise
            else {
                // Return error.
                return encodings_1.decoderError(this.fatal);
            }
            // Return continue.
            return null;
        }
        // 4. If byte is not in the range utf-8 lower boundary to utf-8
        // upper boundary, inclusive, run these substeps:
        if (!utilities_1.inRange(bite, this.utf8_lower_boundary, this.utf8_upper_boundary)) {
            // 1. Set utf-8 code point, utf-8 bytes needed, and utf-8
            // bytes seen to 0, set utf-8 lower boundary to 0x80, and set
            // utf-8 upper boundary to 0xBF.
            this.utf8_code_point = this.utf8_bytes_needed = this.utf8_bytes_seen = 0;
            this.utf8_lower_boundary = 0x80;
            this.utf8_upper_boundary = 0xBF;
            // 2. Prepend byte to stream.
            stream.prepend(bite);
            // 3. Return error.
            return encodings_1.decoderError(this.fatal);
        }
        // 5. Set utf-8 lower boundary to 0x80 and utf-8 upper boundary
        // to 0xBF.
        this.utf8_lower_boundary = 0x80;
        this.utf8_upper_boundary = 0xBF;
        // 6. Set UTF-8 code point to (UTF-8 code point << 6) | (byte &
        // 0x3F)
        this.utf8_code_point = (this.utf8_code_point << 6) | (bite & 0x3F);
        // 7. Increase utf-8 bytes seen by one.
        this.utf8_bytes_seen += 1;
        // 8. If utf-8 bytes seen is not equal to utf-8 bytes needed,
        // continue.
        if (this.utf8_bytes_seen !== this.utf8_bytes_needed)
            return null;
        // 9. Let code point be utf-8 code point.
        var code_point = this.utf8_code_point;
        // 10. Set utf-8 code point, utf-8 bytes needed, and utf-8 bytes
        // seen to 0.
        this.utf8_code_point = this.utf8_bytes_needed = this.utf8_bytes_seen = 0;
        // 11. Return a code point whose value is code point.
        return code_point;
    };
    return UTF8Decoder;
}());
exports.UTF8Decoder = UTF8Decoder;
//# sourceMappingURL=UTF8Decoder.js.map