123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312 |
- // This is free and unencumbered software released into the public domain.
- // See LICENSE.md for more information.
- /**
- * @fileoverview Global |this| required for resolving indexes in node.
- * @suppress {globalThis}
- */
- (function(global) {
- 'use strict';
- // If we're in node require encoding-indexes and attach it to the global.
- if (typeof module !== "undefined" && module.exports &&
- !global["encoding-indexes"]) {
- global["encoding-indexes"] =
- require("./encoding-indexes.js")["encoding-indexes"];
- }
- //
- // Utilities
- //
- /**
- * @param {number} a The number to test.
- * @param {number} min The minimum value in the range, inclusive.
- * @param {number} max The maximum value in the range, inclusive.
- * @return {boolean} True if a >= min and a <= max.
- */
- function inRange(a, min, max) {
- return min <= a && a <= max;
- }
- /**
- * @param {!Array.<*>} array The array to check.
- * @param {*} item The item to look for in the array.
- * @return {boolean} True if the item appears in the array.
- */
- function includes(array, item) {
- return array.indexOf(item) !== -1;
- }
- var floor = Math.floor;
- /**
- * @param {*} o
- * @return {Object}
- */
- function ToDictionary(o) {
- if (o === undefined) return {};
- if (o === Object(o)) return o;
- throw TypeError('Could not convert argument to dictionary');
- }
- /**
- * @param {string} string Input string of UTF-16 code units.
- * @return {!Array.<number>} Code points.
- */
- function stringToCodePoints(string) {
- // https://heycam.github.io/webidl/#dfn-obtain-unicode
- // 1. Let S be the DOMString value.
- var s = String(string);
- // 2. Let n be the length of S.
- var n = s.length;
- // 3. Initialize i to 0.
- var i = 0;
- // 4. Initialize U to be an empty sequence of Unicode characters.
- var u = [];
- // 5. While i < n:
- while (i < n) {
- // 1. Let c be the code unit in S at index i.
- var c = s.charCodeAt(i);
- // 2. Depending on the value of c:
- // c < 0xD800 or c > 0xDFFF
- if (c < 0xD800 || c > 0xDFFF) {
- // Append to U the Unicode character with code point c.
- u.push(c);
- }
- // 0xDC00 ≤ c ≤ 0xDFFF
- else if (0xDC00 <= c && c <= 0xDFFF) {
- // Append to U a U+FFFD REPLACEMENT CHARACTER.
- u.push(0xFFFD);
- }
- // 0xD800 ≤ c ≤ 0xDBFF
- else if (0xD800 <= c && c <= 0xDBFF) {
- // 1. If i = n−1, then append to U a U+FFFD REPLACEMENT
- // CHARACTER.
- if (i === n - 1) {
- u.push(0xFFFD);
- }
- // 2. Otherwise, i < n−1:
- else {
- // 1. Let d be the code unit in S at index i+1.
- var d = s.charCodeAt(i + 1);
- // 2. If 0xDC00 ≤ d ≤ 0xDFFF, then:
- if (0xDC00 <= d && d <= 0xDFFF) {
- // 1. Let a be c & 0x3FF.
- var a = c & 0x3FF;
- // 2. Let b be d & 0x3FF.
- var b = d & 0x3FF;
- // 3. Append to U the Unicode character with code point
- // 2^16+2^10*a+b.
- u.push(0x10000 + (a << 10) + b);
- // 4. Set i to i+1.
- i += 1;
- }
- // 3. Otherwise, d < 0xDC00 or d > 0xDFFF. Append to U a
- // U+FFFD REPLACEMENT CHARACTER.
- else {
- u.push(0xFFFD);
- }
- }
- }
- // 3. Set i to i+1.
- i += 1;
- }
- // 6. Return U.
- return u;
- }
- /**
- * @param {!Array.<number>} code_points Array of code points.
- * @return {string} string String of UTF-16 code units.
- */
- function codePointsToString(code_points) {
- var s = '';
- for (var i = 0; i < code_points.length; ++i) {
- var cp = code_points[i];
- if (cp <= 0xFFFF) {
- s += String.fromCharCode(cp);
- } else {
- cp -= 0x10000;
- s += String.fromCharCode((cp >> 10) + 0xD800,
- (cp & 0x3FF) + 0xDC00);
- }
- }
- return s;
- }
- //
- // Implementation of Encoding specification
- // https://encoding.spec.whatwg.org/
- //
- //
- // 4. Terminology
- //
- /**
- * An ASCII byte is a byte in the range 0x00 to 0x7F, inclusive.
- * @param {number} a The number to test.
- * @return {boolean} True if a is in the range 0x00 to 0x7F, inclusive.
- */
- function isASCIIByte(a) {
- return 0x00 <= a && a <= 0x7F;
- }
- /**
- * An ASCII code point is a code point in the range U+0000 to
- * U+007F, inclusive.
- */
- var isASCIICodePoint = isASCIIByte;
- /**
- * End-of-stream is a special token that signifies no more tokens
- * are in the stream.
- * @const
- */ var end_of_stream = -1;
- /**
- * A stream represents an ordered sequence of tokens.
- *
- * @constructor
- * @param {!(Array.<number>|Uint8Array)} tokens Array of tokens that provide
- * the stream.
- */
- function Stream(tokens) {
- /** @type {!Array.<number>} */
- this.tokens = [].slice.call(tokens);
- // Reversed as push/pop is more efficient than shift/unshift.
- this.tokens.reverse();
- }
- Stream.prototype = {
- /**
- * @return {boolean} True if end-of-stream has been hit.
- */
- endOfStream: function() {
- return !this.tokens.length;
- },
- /**
- * When a token is read from a stream, the first token in the
- * stream must be returned and subsequently removed, and
- * end-of-stream must be returned otherwise.
- *
- * @return {number} Get the next token from the stream, or
- * end_of_stream.
- */
- read: function() {
- if (!this.tokens.length)
- return end_of_stream;
- return this.tokens.pop();
- },
- /**
- * When one or more tokens are prepended to a stream, those tokens
- * must be inserted, in given order, before the first token in the
- * stream.
- *
- * @param {(number|!Array.<number>)} token The token(s) to prepend to the
- * stream.
- */
- prepend: function(token) {
- if (Array.isArray(token)) {
- var tokens = /**@type {!Array.<number>}*/(token);
- while (tokens.length)
- this.tokens.push(tokens.pop());
- } else {
- this.tokens.push(token);
- }
- },
- /**
- * When one or more tokens are pushed to a stream, those tokens
- * must be inserted, in given order, after the last token in the
- * stream.
- *
- * @param {(number|!Array.<number>)} token The tokens(s) to push to the
- * stream.
- */
- push: function(token) {
- if (Array.isArray(token)) {
- var tokens = /**@type {!Array.<number>}*/(token);
- while (tokens.length)
- this.tokens.unshift(tokens.shift());
- } else {
- this.tokens.unshift(token);
- }
- }
- };
- //
- // 5. Encodings
- //
- // 5.1 Encoders and decoders
- /** @const */
- var finished = -1;
- /**
- * @param {boolean} fatal If true, decoding errors raise an exception.
- * @param {number=} opt_code_point Override the standard fallback code point.
- * @return {number} The code point to insert on a decoding error.
- */
- function decoderError(fatal, opt_code_point) {
- if (fatal)
- throw TypeError('Decoder error');
- return opt_code_point || 0xFFFD;
- }
- /**
- * @param {number} code_point The code point that could not be encoded.
- * @return {number} Always throws, no value is actually returned.
- */
- function encoderError(code_point) {
- throw TypeError('The code point ' + code_point + ' could not be encoded.');
- }
- /** @interface */
- function Decoder() {}
- Decoder.prototype = {
- /**
- * @param {Stream} stream The stream of bytes being decoded.
- * @param {number} bite The next byte read from the stream.
- * @return {?(number|!Array.<number>)} The next code point(s)
- * decoded, or null if not enough data exists in the input
- * stream to decode a complete code point, or |finished|.
- */
- handler: function(stream, bite) {}
- };
- /** @interface */
- function Encoder() {}
- Encoder.prototype = {
- /**
- * @param {Stream} stream The stream of code points being encoded.
- * @param {number} code_point Next code point read from the stream.
- * @return {(number|!Array.<number>)} Byte(s) to emit, or |finished|.
- */
- handler: function(stream, code_point) {}
- };
- // 5.2 Names and labels
- // TODO: Define @typedef for Encoding: {name:string,labels:Array.<string>}
- // https://github.com/google/closure-compiler/issues/247
- /**
- * @param {string} label The encoding label.
- * @return {?{name:string,labels:Array.<string>}}
- */
- function getEncoding(label) {
- // 1. Remove any leading and trailing ASCII whitespace from label.
- label = String(label).trim().toLowerCase();
- // 2. If label is an ASCII case-insensitive match for any of the
- // labels listed in the table below, return the corresponding
- // encoding, and failure otherwise.
- if (Object.prototype.hasOwnProperty.call(label_to_encoding, label)) {
- return label_to_encoding[label];
- }
- return null;
- }
- /**
- * Encodings table: https://encoding.spec.whatwg.org/encodings.json
- * @const
- * @type {!Array.<{
- * heading: string,
- * encodings: Array.<{name:string,labels:Array.<string>}>
- * }>}
- */
- var encodings = [
- {
- "encodings": [
- {
- "labels": [
- "unicode-1-1-utf-8",
- "utf-8",
- "utf8"
- ],
- "name": "UTF-8"
- }
- ],
- "heading": "The Encoding"
- },
- {
- "encodings": [
- {
- "labels": [
- "866",
- "cp866",
- "csibm866",
- "ibm866"
- ],
- "name": "IBM866"
- },
- {
- "labels": [
- "csisolatin2",
- "iso-8859-2",
- "iso-ir-101",
- "iso8859-2",
- "iso88592",
- "iso_8859-2",
- "iso_8859-2:1987",
- "l2",
- "latin2"
- ],
- "name": "ISO-8859-2"
- },
- {
- "labels": [
- "csisolatin3",
- "iso-8859-3",
- "iso-ir-109",
- "iso8859-3",
- "iso88593",
- "iso_8859-3",
- "iso_8859-3:1988",
- "l3",
- "latin3"
- ],
- "name": "ISO-8859-3"
- },
- {
- "labels": [
- "csisolatin4",
- "iso-8859-4",
- "iso-ir-110",
- "iso8859-4",
- "iso88594",
- "iso_8859-4",
- "iso_8859-4:1988",
- "l4",
- "latin4"
- ],
- "name": "ISO-8859-4"
- },
- {
- "labels": [
- "csisolatincyrillic",
- "cyrillic",
- "iso-8859-5",
- "iso-ir-144",
- "iso8859-5",
- "iso88595",
- "iso_8859-5",
- "iso_8859-5:1988"
- ],
- "name": "ISO-8859-5"
- },
- {
- "labels": [
- "arabic",
- "asmo-708",
- "csiso88596e",
- "csiso88596i",
- "csisolatinarabic",
- "ecma-114",
- "iso-8859-6",
- "iso-8859-6-e",
- "iso-8859-6-i",
- "iso-ir-127",
- "iso8859-6",
- "iso88596",
- "iso_8859-6",
- "iso_8859-6:1987"
- ],
- "name": "ISO-8859-6"
- },
- {
- "labels": [
- "csisolatingreek",
- "ecma-118",
- "elot_928",
- "greek",
- "greek8",
- "iso-8859-7",
- "iso-ir-126",
- "iso8859-7",
- "iso88597",
- "iso_8859-7",
- "iso_8859-7:1987",
- "sun_eu_greek"
- ],
- "name": "ISO-8859-7"
- },
- {
- "labels": [
- "csiso88598e",
- "csisolatinhebrew",
- "hebrew",
- "iso-8859-8",
- "iso-8859-8-e",
- "iso-ir-138",
- "iso8859-8",
- "iso88598",
- "iso_8859-8",
- "iso_8859-8:1988",
- "visual"
- ],
- "name": "ISO-8859-8"
- },
- {
- "labels": [
- "csiso88598i",
- "iso-8859-8-i",
- "logical"
- ],
- "name": "ISO-8859-8-I"
- },
- {
- "labels": [
- "csisolatin6",
- "iso-8859-10",
- "iso-ir-157",
- "iso8859-10",
- "iso885910",
- "l6",
- "latin6"
- ],
- "name": "ISO-8859-10"
- },
- {
- "labels": [
- "iso-8859-13",
- "iso8859-13",
- "iso885913"
- ],
- "name": "ISO-8859-13"
- },
- {
- "labels": [
- "iso-8859-14",
- "iso8859-14",
- "iso885914"
- ],
- "name": "ISO-8859-14"
- },
- {
- "labels": [
- "csisolatin9",
- "iso-8859-15",
- "iso8859-15",
- "iso885915",
- "iso_8859-15",
- "l9"
- ],
- "name": "ISO-8859-15"
- },
- {
- "labels": [
- "iso-8859-16"
- ],
- "name": "ISO-8859-16"
- },
- {
- "labels": [
- "cskoi8r",
- "koi",
- "koi8",
- "koi8-r",
- "koi8_r"
- ],
- "name": "KOI8-R"
- },
- {
- "labels": [
- "koi8-ru",
- "koi8-u"
- ],
- "name": "KOI8-U"
- },
- {
- "labels": [
- "csmacintosh",
- "mac",
- "macintosh",
- "x-mac-roman"
- ],
- "name": "macintosh"
- },
- {
- "labels": [
- "dos-874",
- "iso-8859-11",
- "iso8859-11",
- "iso885911",
- "tis-620",
- "windows-874"
- ],
- "name": "windows-874"
- },
- {
- "labels": [
- "cp1250",
- "windows-1250",
- "x-cp1250"
- ],
- "name": "windows-1250"
- },
- {
- "labels": [
- "cp1251",
- "windows-1251",
- "x-cp1251"
- ],
- "name": "windows-1251"
- },
- {
- "labels": [
- "ansi_x3.4-1968",
- "ascii",
- "cp1252",
- "cp819",
- "csisolatin1",
- "ibm819",
- "iso-8859-1",
- "iso-ir-100",
- "iso8859-1",
- "iso88591",
- "iso_8859-1",
- "iso_8859-1:1987",
- "l1",
- "latin1",
- "us-ascii",
- "windows-1252",
- "x-cp1252"
- ],
- "name": "windows-1252"
- },
- {
- "labels": [
- "cp1253",
- "windows-1253",
- "x-cp1253"
- ],
- "name": "windows-1253"
- },
- {
- "labels": [
- "cp1254",
- "csisolatin5",
- "iso-8859-9",
- "iso-ir-148",
- "iso8859-9",
- "iso88599",
- "iso_8859-9",
- "iso_8859-9:1989",
- "l5",
- "latin5",
- "windows-1254",
- "x-cp1254"
- ],
- "name": "windows-1254"
- },
- {
- "labels": [
- "cp1255",
- "windows-1255",
- "x-cp1255"
- ],
- "name": "windows-1255"
- },
- {
- "labels": [
- "cp1256",
- "windows-1256",
- "x-cp1256"
- ],
- "name": "windows-1256"
- },
- {
- "labels": [
- "cp1257",
- "windows-1257",
- "x-cp1257"
- ],
- "name": "windows-1257"
- },
- {
- "labels": [
- "cp1258",
- "windows-1258",
- "x-cp1258"
- ],
- "name": "windows-1258"
- },
- {
- "labels": [
- "x-mac-cyrillic",
- "x-mac-ukrainian"
- ],
- "name": "x-mac-cyrillic"
- }
- ],
- "heading": "Legacy single-byte encodings"
- },
- {
- "encodings": [
- {
- "labels": [
- "chinese",
- "csgb2312",
- "csiso58gb231280",
- "gb2312",
- "gb_2312",
- "gb_2312-80",
- "gbk",
- "iso-ir-58",
- "x-gbk"
- ],
- "name": "GBK"
- },
- {
- "labels": [
- "gb18030"
- ],
- "name": "gb18030"
- }
- ],
- "heading": "Legacy multi-byte Chinese (simplified) encodings"
- },
- {
- "encodings": [
- {
- "labels": [
- "big5",
- "big5-hkscs",
- "cn-big5",
- "csbig5",
- "x-x-big5"
- ],
- "name": "Big5"
- }
- ],
- "heading": "Legacy multi-byte Chinese (traditional) encodings"
- },
- {
- "encodings": [
- {
- "labels": [
- "cseucpkdfmtjapanese",
- "euc-jp",
- "x-euc-jp"
- ],
- "name": "EUC-JP"
- },
- {
- "labels": [
- "csiso2022jp",
- "iso-2022-jp"
- ],
- "name": "ISO-2022-JP"
- },
- {
- "labels": [
- "csshiftjis",
- "ms932",
- "ms_kanji",
- "shift-jis",
- "shift_jis",
- "sjis",
- "windows-31j",
- "x-sjis"
- ],
- "name": "Shift_JIS"
- }
- ],
- "heading": "Legacy multi-byte Japanese encodings"
- },
- {
- "encodings": [
- {
- "labels": [
- "cseuckr",
- "csksc56011987",
- "euc-kr",
- "iso-ir-149",
- "korean",
- "ks_c_5601-1987",
- "ks_c_5601-1989",
- "ksc5601",
- "ksc_5601",
- "windows-949"
- ],
- "name": "EUC-KR"
- }
- ],
- "heading": "Legacy multi-byte Korean encodings"
- },
- {
- "encodings": [
- {
- "labels": [
- "csiso2022kr",
- "hz-gb-2312",
- "iso-2022-cn",
- "iso-2022-cn-ext",
- "iso-2022-kr"
- ],
- "name": "replacement"
- },
- {
- "labels": [
- "utf-16be"
- ],
- "name": "UTF-16BE"
- },
- {
- "labels": [
- "utf-16",
- "utf-16le"
- ],
- "name": "UTF-16LE"
- },
- {
- "labels": [
- "x-user-defined"
- ],
- "name": "x-user-defined"
- }
- ],
- "heading": "Legacy miscellaneous encodings"
- }
- ];
- // Label to encoding registry.
- /** @type {Object.<string,{name:string,labels:Array.<string>}>} */
- var label_to_encoding = {};
- encodings.forEach(function(category) {
- category.encodings.forEach(function(encoding) {
- encoding.labels.forEach(function(label) {
- label_to_encoding[label] = encoding;
- });
- });
- });
- // Registry of of encoder/decoder factories, by encoding name.
- /** @type {Object.<string, function({fatal:boolean}): Encoder>} */
- var encoders = {};
- /** @type {Object.<string, function({fatal:boolean}): Decoder>} */
- var decoders = {};
- //
- // 6. Indexes
- //
- /**
- * @param {number} pointer The |pointer| to search for.
- * @param {(!Array.<?number>|undefined)} index The |index| to search within.
- * @return {?number} The code point corresponding to |pointer| in |index|,
- * or null if |code point| is not in |index|.
- */
- function indexCodePointFor(pointer, index) {
- if (!index) return null;
- return index[pointer] || null;
- }
- /**
- * @param {number} code_point The |code point| to search for.
- * @param {!Array.<?number>} index The |index| to search within.
- * @return {?number} The first pointer corresponding to |code point| in
- * |index|, or null if |code point| is not in |index|.
- */
- function indexPointerFor(code_point, index) {
- var pointer = index.indexOf(code_point);
- return pointer === -1 ? null : pointer;
- }
- /**
- * @param {string} name Name of the index.
- * @return {(!Array.<number>|!Array.<Array.<number>>)}
- * */
- function index(name) {
- if (!('encoding-indexes' in global)) {
- throw Error("Indexes missing." +
- " Did you forget to include encoding-indexes.js first?");
- }
- return global['encoding-indexes'][name];
- }
- /**
- * @param {number} pointer The |pointer| to search for in the gb18030 index.
- * @return {?number} The code point corresponding to |pointer| in |index|,
- * or null if |code point| is not in the gb18030 index.
- */
- function indexGB18030RangesCodePointFor(pointer) {
- // 1. If pointer is greater than 39419 and less than 189000, or
- // pointer is greater than 1237575, return null.
- if ((pointer > 39419 && pointer < 189000) || (pointer > 1237575))
- return null;
- // 2. If pointer is 7457, return code point U+E7C7.
- if (pointer === 7457) return 0xE7C7;
- // 3. Let offset be the last pointer in index gb18030 ranges that
- // is equal to or less than pointer and let code point offset be
- // its corresponding code point.
- var offset = 0;
- var code_point_offset = 0;
- var idx = index('gb18030-ranges');
- var i;
- for (i = 0; i < idx.length; ++i) {
- /** @type {!Array.<number>} */
- var entry = idx[i];
- if (entry[0] <= pointer) {
- offset = entry[0];
- code_point_offset = entry[1];
- } else {
- break;
- }
- }
- // 4. Return a code point whose value is code point offset +
- // pointer − offset.
- return code_point_offset + pointer - offset;
- }
- /**
- * @param {number} code_point The |code point| to locate in the gb18030 index.
- * @return {number} The first pointer corresponding to |code point| in the
- * gb18030 index.
- */
- function indexGB18030RangesPointerFor(code_point) {
- // 1. If code point is U+E7C7, return pointer 7457.
- if (code_point === 0xE7C7) return 7457;
- // 2. Let offset be the last code point in index gb18030 ranges
- // that is equal to or less than code point and let pointer offset
- // be its corresponding pointer.
- var offset = 0;
- var pointer_offset = 0;
- var idx = index('gb18030-ranges');
- var i;
- for (i = 0; i < idx.length; ++i) {
- /** @type {!Array.<number>} */
- var entry = idx[i];
- if (entry[1] <= code_point) {
- offset = entry[1];
- pointer_offset = entry[0];
- } else {
- break;
- }
- }
- // 3. Return a pointer whose value is pointer offset + code point
- // − offset.
- return pointer_offset + code_point - offset;
- }
- /**
- * @param {number} code_point The |code_point| to search for in the Shift_JIS
- * index.
- * @return {?number} The code point corresponding to |pointer| in |index|,
- * or null if |code point| is not in the Shift_JIS index.
- */
- function indexShiftJISPointerFor(code_point) {
- // 1. Let index be index jis0208 excluding all entries whose
- // pointer is in the range 8272 to 8835, inclusive.
- shift_jis_index = shift_jis_index ||
- index('jis0208').map(function(code_point, pointer) {
- return inRange(pointer, 8272, 8835) ? null : code_point;
- });
- var index_ = shift_jis_index;
- // 2. Return the index pointer for code point in index.
- return index_.indexOf(code_point);
- }
- var shift_jis_index;
- /**
- * @param {number} code_point The |code_point| to search for in the big5
- * index.
- * @return {?number} The code point corresponding to |pointer| in |index|,
- * or null if |code point| is not in the big5 index.
- */
- function indexBig5PointerFor(code_point) {
- // 1. Let index be index Big5 excluding all entries whose pointer
- big5_index_no_hkscs = big5_index_no_hkscs ||
- index('big5').map(function(code_point, pointer) {
- return (pointer < (0xA1 - 0x81) * 157) ? null : code_point;
- });
- var index_ = big5_index_no_hkscs;
- // 2. If code point is U+2550, U+255E, U+2561, U+256A, U+5341, or
- // U+5345, return the last pointer corresponding to code point in
- // index.
- if (code_point === 0x2550 || code_point === 0x255E ||
- code_point === 0x2561 || code_point === 0x256A ||
- code_point === 0x5341 || code_point === 0x5345) {
- return index_.lastIndexOf(code_point);
- }
- // 3. Return the index pointer for code point in index.
- return indexPointerFor(code_point, index_);
- }
- var big5_index_no_hkscs;
- //
- // 8. API
- //
- /** @const */ var DEFAULT_ENCODING = 'utf-8';
- // 8.1 Interface TextDecoder
- /**
- * @constructor
- * @param {string=} label The label of the encoding;
- * defaults to 'utf-8'.
- * @param {Object=} options
- */
- function TextDecoder(label, options) {
- // Web IDL conventions
- if (!(this instanceof TextDecoder))
- throw TypeError('Called as a function. Did you forget \'new\'?');
- label = label !== undefined ? String(label) : DEFAULT_ENCODING;
- options = ToDictionary(options);
- // A TextDecoder object has an associated encoding, decoder,
- // stream, ignore BOM flag (initially unset), BOM seen flag
- // (initially unset), error mode (initially replacement), and do
- // not flush flag (initially unset).
- /** @private */
- this._encoding = null;
- /** @private @type {?Decoder} */
- this._decoder = null;
- /** @private @type {boolean} */
- this._ignoreBOM = false;
- /** @private @type {boolean} */
- this._BOMseen = false;
- /** @private @type {string} */
- this._error_mode = 'replacement';
- /** @private @type {boolean} */
- this._do_not_flush = false;
- // 1. Let encoding be the result of getting an encoding from
- // label.
- var encoding = getEncoding(label);
- // 2. If encoding is failure or replacement, throw a RangeError.
- if (encoding === null || encoding.name === 'replacement')
- throw RangeError('Unknown encoding: ' + label);
- if (!decoders[encoding.name]) {
- throw Error('Decoder not present.' +
- ' Did you forget to include encoding-indexes.js first?');
- }
- // 3. Let dec be a new TextDecoder object.
- var dec = this;
- // 4. Set dec's encoding to encoding.
- dec._encoding = encoding;
- // 5. If options's fatal member is true, set dec's error mode to
- // fatal.
- if (Boolean(options['fatal']))
- dec._error_mode = 'fatal';
- // 6. If options's ignoreBOM member is true, set dec's ignore BOM
- // flag.
- if (Boolean(options['ignoreBOM']))
- dec._ignoreBOM = true;
- // For pre-ES5 runtimes:
- if (!Object.defineProperty) {
- this.encoding = dec._encoding.name.toLowerCase();
- this.fatal = dec._error_mode === 'fatal';
- this.ignoreBOM = dec._ignoreBOM;
- }
- // 7. Return dec.
- return dec;
- }
- if (Object.defineProperty) {
- // The encoding attribute's getter must return encoding's name.
- Object.defineProperty(TextDecoder.prototype, 'encoding', {
- /** @this {TextDecoder} */
- get: function() { return this._encoding.name.toLowerCase(); }
- });
- // The fatal attribute's getter must return true if error mode
- // is fatal, and false otherwise.
- Object.defineProperty(TextDecoder.prototype, 'fatal', {
- /** @this {TextDecoder} */
- get: function() { return this._error_mode === 'fatal'; }
- });
- // The ignoreBOM attribute's getter must return true if ignore
- // BOM flag is set, and false otherwise.
- Object.defineProperty(TextDecoder.prototype, 'ignoreBOM', {
- /** @this {TextDecoder} */
- get: function() { return this._ignoreBOM; }
- });
- }
- /**
- * @param {BufferSource=} input The buffer of bytes to decode.
- * @param {Object=} options
- * @return {string} The decoded string.
- */
- TextDecoder.prototype.decode = function decode(input, options) {
- var bytes;
- if (typeof input === 'object' && input instanceof ArrayBuffer) {
- bytes = new Uint8Array(input);
- } else if (typeof input === 'object' && 'buffer' in input &&
- input.buffer instanceof ArrayBuffer) {
- bytes = new Uint8Array(input.buffer,
- input.byteOffset,
- input.byteLength);
- } else {
- bytes = new Uint8Array(0);
- }
- options = ToDictionary(options);
- // 1. If the do not flush flag is unset, set decoder to a new
- // encoding's decoder, set stream to a new stream, and unset the
- // BOM seen flag.
- if (!this._do_not_flush) {
- this._decoder = decoders[this._encoding.name]({
- fatal: this._error_mode === 'fatal'});
- this._BOMseen = false;
- }
- // 2. If options's stream is true, set the do not flush flag, and
- // unset the do not flush flag otherwise.
- this._do_not_flush = Boolean(options['stream']);
- // 3. If input is given, push a copy of input to stream.
- // TODO: Align with spec algorithm - maintain stream on instance.
- var input_stream = new Stream(bytes);
- // 4. Let output be a new stream.
- var output = [];
- /** @type {?(number|!Array.<number>)} */
- var result;
- // 5. While true:
- while (true) {
- // 1. Let token be the result of reading from stream.
- var token = input_stream.read();
- // 2. If token is end-of-stream and the do not flush flag is
- // set, return output, serialized.
- // TODO: Align with spec algorithm.
- if (token === end_of_stream)
- break;
- // 3. Otherwise, run these subsubsteps:
- // 1. Let result be the result of processing token for decoder,
- // stream, output, and error mode.
- result = this._decoder.handler(input_stream, token);
- // 2. If result is finished, return output, serialized.
- if (result === finished)
- break;
- if (result !== null) {
- if (Array.isArray(result))
- output.push.apply(output, /**@type {!Array.<number>}*/(result));
- else
- output.push(result);
- }
- // 3. Otherwise, if result is error, throw a TypeError.
- // (Thrown in handler)
- // 4. Otherwise, do nothing.
- }
- // TODO: Align with spec algorithm.
- if (!this._do_not_flush) {
- do {
- result = this._decoder.handler(input_stream, input_stream.read());
- if (result === finished)
- break;
- if (result === null)
- continue;
- if (Array.isArray(result))
- output.push.apply(output, /**@type {!Array.<number>}*/(result));
- else
- output.push(result);
- } while (!input_stream.endOfStream());
- this._decoder = null;
- }
- // A TextDecoder object also has an associated serialize stream
- // algorithm...
- /**
- * @param {!Array.<number>} stream
- * @return {string}
- * @this {TextDecoder}
- */
- function serializeStream(stream) {
- // 1. Let token be the result of reading from stream.
- // (Done in-place on array, rather than as a stream)
- // 2. If encoding is UTF-8, UTF-16BE, or UTF-16LE, and ignore
- // BOM flag and BOM seen flag are unset, run these subsubsteps:
- if (includes(['UTF-8', 'UTF-16LE', 'UTF-16BE'], this._encoding.name) &&
- !this._ignoreBOM && !this._BOMseen) {
- if (stream.length > 0 && stream[0] === 0xFEFF) {
- // 1. If token is U+FEFF, set BOM seen flag.
- this._BOMseen = true;
- stream.shift();
- } else if (stream.length > 0) {
- // 2. Otherwise, if token is not end-of-stream, set BOM seen
- // flag and append token to stream.
- this._BOMseen = true;
- } else {
- // 3. Otherwise, if token is not end-of-stream, append token
- // to output.
- // (no-op)
- }
- }
- // 4. Otherwise, return output.
- return codePointsToString(stream);
- }
- return serializeStream.call(this, output);
- };
- // 8.2 Interface TextEncoder
- /**
- * @constructor
- * @param {string=} label The label of the encoding. NONSTANDARD.
- * @param {Object=} options NONSTANDARD.
- */
- function TextEncoder(label, options) {
- // Web IDL conventions
- if (!(this instanceof TextEncoder))
- throw TypeError('Called as a function. Did you forget \'new\'?');
- options = ToDictionary(options);
- // A TextEncoder object has an associated encoding and encoder.
- /** @private */
- this._encoding = null;
- /** @private @type {?Encoder} */
- this._encoder = null;
- // Non-standard
- /** @private @type {boolean} */
- this._do_not_flush = false;
- /** @private @type {string} */
- this._fatal = Boolean(options['fatal']) ? 'fatal' : 'replacement';
- // 1. Let enc be a new TextEncoder object.
- var enc = this;
- // 2. Set enc's encoding to UTF-8's encoder.
- if (Boolean(options['NONSTANDARD_allowLegacyEncoding'])) {
- // NONSTANDARD behavior.
- label = label !== undefined ? String(label) : DEFAULT_ENCODING;
- var encoding = getEncoding(label);
- if (encoding === null || encoding.name === 'replacement')
- throw RangeError('Unknown encoding: ' + label);
- if (!encoders[encoding.name]) {
- throw Error('Encoder not present.' +
- ' Did you forget to include encoding-indexes.js first?');
- }
- enc._encoding = encoding;
- } else {
- // Standard behavior.
- enc._encoding = getEncoding('utf-8');
- if (label !== undefined && 'console' in global) {
- console.warn('TextEncoder constructor called with encoding label, '
- + 'which is ignored.');
- }
- }
- // For pre-ES5 runtimes:
- if (!Object.defineProperty)
- this.encoding = enc._encoding.name.toLowerCase();
- // 3. Return enc.
- return enc;
- }
- if (Object.defineProperty) {
- // The encoding attribute's getter must return encoding's name.
- Object.defineProperty(TextEncoder.prototype, 'encoding', {
- /** @this {TextEncoder} */
- get: function() { return this._encoding.name.toLowerCase(); }
- });
- }
- /**
- * @param {string=} opt_string The string to encode.
- * @param {Object=} options
- * @return {!Uint8Array} Encoded bytes, as a Uint8Array.
- */
- TextEncoder.prototype.encode = function encode(opt_string, options) {
- opt_string = opt_string === undefined ? '' : String(opt_string);
- options = ToDictionary(options);
- // NOTE: This option is nonstandard. None of the encodings
- // permitted for encoding (i.e. UTF-8, UTF-16) are stateful when
- // the input is a USVString so streaming is not necessary.
- if (!this._do_not_flush)
- this._encoder = encoders[this._encoding.name]({
- fatal: this._fatal === 'fatal'});
- this._do_not_flush = Boolean(options['stream']);
- // 1. Convert input to a stream.
- var input = new Stream(stringToCodePoints(opt_string));
- // 2. Let output be a new stream
- var output = [];
- /** @type {?(number|!Array.<number>)} */
- var result;
- // 3. While true, run these substeps:
- while (true) {
- // 1. Let token be the result of reading from input.
- var token = input.read();
- if (token === end_of_stream)
- break;
- // 2. Let result be the result of processing token for encoder,
- // input, output.
- result = this._encoder.handler(input, token);
- if (result === finished)
- break;
- if (Array.isArray(result))
- output.push.apply(output, /**@type {!Array.<number>}*/(result));
- else
- output.push(result);
- }
- // TODO: Align with spec algorithm.
- if (!this._do_not_flush) {
- while (true) {
- result = this._encoder.handler(input, input.read());
- if (result === finished)
- break;
- if (Array.isArray(result))
- output.push.apply(output, /**@type {!Array.<number>}*/(result));
- else
- output.push(result);
- }
- this._encoder = null;
- }
- // 3. If result is finished, convert output into a byte sequence,
- // and then return a Uint8Array object wrapping an ArrayBuffer
- // containing output.
- return new Uint8Array(output);
- };
- //
- // 9. The encoding
- //
- // 9.1 utf-8
- // 9.1.1 utf-8 decoder
- /**
- * @constructor
- * @implements {Decoder}
- * @param {{fatal: boolean}} options
- */
- function UTF8Decoder(options) {
- var fatal = options.fatal;
- // utf-8's decoder's has an associated utf-8 code point, utf-8
- // bytes seen, and utf-8 bytes needed (all initially 0), a utf-8
- // lower boundary (initially 0x80), and a utf-8 upper boundary
- // (initially 0xBF).
- var /** @type {number} */ utf8_code_point = 0,
- /** @type {number} */ utf8_bytes_seen = 0,
- /** @type {number} */ utf8_bytes_needed = 0,
- /** @type {number} */ utf8_lower_boundary = 0x80,
- /** @type {number} */ utf8_upper_boundary = 0xBF;
- /**
- * @param {Stream} stream The stream of bytes being decoded.
- * @param {number} bite The next byte read from the stream.
- * @return {?(number|!Array.<number>)} The next code point(s)
- * decoded, or null if not enough data exists in the input
- * stream to decode a complete code point.
- */
- this.handler = function(stream, bite) {
- // 1. If byte is end-of-stream and utf-8 bytes needed is not 0,
- // set utf-8 bytes needed to 0 and return error.
- if (bite === end_of_stream && utf8_bytes_needed !== 0) {
- utf8_bytes_needed = 0;
- return decoderError(fatal);
- }
- // 2. If byte is end-of-stream, return finished.
- if (bite === end_of_stream)
- return finished;
- // 3. If utf-8 bytes needed is 0, based on byte:
- if (utf8_bytes_needed === 0) {
- // 0x00 to 0x7F
- if (inRange(bite, 0x00, 0x7F)) {
- // Return a code point whose value is byte.
- return bite;
- }
- // 0xC2 to 0xDF
- else if (inRange(bite, 0xC2, 0xDF)) {
- // 1. Set utf-8 bytes needed to 1.
- utf8_bytes_needed = 1;
- // 2. Set UTF-8 code point to byte & 0x1F.
- utf8_code_point = bite & 0x1F;
- }
- // 0xE0 to 0xEF
- else if (inRange(bite, 0xE0, 0xEF)) {
- // 1. If byte is 0xE0, set utf-8 lower boundary to 0xA0.
- if (bite === 0xE0)
- utf8_lower_boundary = 0xA0;
- // 2. If byte is 0xED, set utf-8 upper boundary to 0x9F.
- if (bite === 0xED)
- utf8_upper_boundary = 0x9F;
- // 3. Set utf-8 bytes needed to 2.
- utf8_bytes_needed = 2;
- // 4. Set UTF-8 code point to byte & 0xF.
- utf8_code_point = bite & 0xF;
- }
- // 0xF0 to 0xF4
- else if (inRange(bite, 0xF0, 0xF4)) {
- // 1. If byte is 0xF0, set utf-8 lower boundary to 0x90.
- if (bite === 0xF0)
- utf8_lower_boundary = 0x90;
- // 2. If byte is 0xF4, set utf-8 upper boundary to 0x8F.
- if (bite === 0xF4)
- utf8_upper_boundary = 0x8F;
- // 3. Set utf-8 bytes needed to 3.
- utf8_bytes_needed = 3;
- // 4. Set UTF-8 code point to byte & 0x7.
- utf8_code_point = bite & 0x7;
- }
- // Otherwise
- else {
- // Return error.
- return decoderError(fatal);
- }
- // Return continue.
- return null;
- }
- // 4. If byte is not in the range utf-8 lower boundary to utf-8
- // upper boundary, inclusive, run these substeps:
- if (!inRange(bite, utf8_lower_boundary, utf8_upper_boundary)) {
- // 1. Set utf-8 code point, utf-8 bytes needed, and utf-8
- // bytes seen to 0, set utf-8 lower boundary to 0x80, and set
- // utf-8 upper boundary to 0xBF.
- utf8_code_point = utf8_bytes_needed = utf8_bytes_seen = 0;
- utf8_lower_boundary = 0x80;
- utf8_upper_boundary = 0xBF;
- // 2. Prepend byte to stream.
- stream.prepend(bite);
- // 3. Return error.
- return decoderError(fatal);
- }
- // 5. Set utf-8 lower boundary to 0x80 and utf-8 upper boundary
- // to 0xBF.
- utf8_lower_boundary = 0x80;
- utf8_upper_boundary = 0xBF;
- // 6. Set UTF-8 code point to (UTF-8 code point << 6) | (byte &
- // 0x3F)
- utf8_code_point = (utf8_code_point << 6) | (bite & 0x3F);
- // 7. Increase utf-8 bytes seen by one.
- utf8_bytes_seen += 1;
- // 8. If utf-8 bytes seen is not equal to utf-8 bytes needed,
- // continue.
- if (utf8_bytes_seen !== utf8_bytes_needed)
- return null;
- // 9. Let code point be utf-8 code point.
- var code_point = utf8_code_point;
- // 10. Set utf-8 code point, utf-8 bytes needed, and utf-8 bytes
- // seen to 0.
- utf8_code_point = utf8_bytes_needed = utf8_bytes_seen = 0;
- // 11. Return a code point whose value is code point.
- return code_point;
- };
- }
- // 9.1.2 utf-8 encoder
- /**
- * @constructor
- * @implements {Encoder}
- * @param {{fatal: boolean}} options
- */
- function UTF8Encoder(options) {
- var fatal = options.fatal;
- /**
- * @param {Stream} stream Input stream.
- * @param {number} code_point Next code point read from the stream.
- * @return {(number|!Array.<number>)} Byte(s) to emit.
- */
- this.handler = function(stream, code_point) {
- // 1. If code point is end-of-stream, return finished.
- if (code_point === end_of_stream)
- return finished;
- // 2. If code point is an ASCII code point, return a byte whose
- // value is code point.
- if (isASCIICodePoint(code_point))
- return code_point;
- // 3. Set count and offset based on the range code point is in:
- var count, offset;
- // U+0080 to U+07FF, inclusive:
- if (inRange(code_point, 0x0080, 0x07FF)) {
- // 1 and 0xC0
- count = 1;
- offset = 0xC0;
- }
- // U+0800 to U+FFFF, inclusive:
- else if (inRange(code_point, 0x0800, 0xFFFF)) {
- // 2 and 0xE0
- count = 2;
- offset = 0xE0;
- }
- // U+10000 to U+10FFFF, inclusive:
- else if (inRange(code_point, 0x10000, 0x10FFFF)) {
- // 3 and 0xF0
- count = 3;
- offset = 0xF0;
- }
- // 4. Let bytes be a byte sequence whose first byte is (code
- // point >> (6 × count)) + offset.
- var bytes = [(code_point >> (6 * count)) + offset];
- // 5. Run these substeps while count is greater than 0:
- while (count > 0) {
- // 1. Set temp to code point >> (6 × (count − 1)).
- var temp = code_point >> (6 * (count - 1));
- // 2. Append to bytes 0x80 | (temp & 0x3F).
- bytes.push(0x80 | (temp & 0x3F));
- // 3. Decrease count by one.
- count -= 1;
- }
- // 6. Return bytes bytes, in order.
- return bytes;
- };
- }
- /** @param {{fatal: boolean}} options */
- encoders['UTF-8'] = function(options) {
- return new UTF8Encoder(options);
- };
- /** @param {{fatal: boolean}} options */
- decoders['UTF-8'] = function(options) {
- return new UTF8Decoder(options);
- };
- //
- // 10. Legacy single-byte encodings
- //
- // 10.1 single-byte decoder
- /**
- * @constructor
- * @implements {Decoder}
- * @param {!Array.<number>} index The encoding index.
- * @param {{fatal: boolean}} options
- */
- function SingleByteDecoder(index, options) {
- var fatal = options.fatal;
- /**
- * @param {Stream} stream The stream of bytes being decoded.
- * @param {number} bite The next byte read from the stream.
- * @return {?(number|!Array.<number>)} The next code point(s)
- * decoded, or null if not enough data exists in the input
- * stream to decode a complete code point.
- */
- this.handler = function(stream, bite) {
- // 1. If byte is end-of-stream, return finished.
- if (bite === end_of_stream)
- return finished;
- // 2. If byte is an ASCII byte, return a code point whose value
- // is byte.
- if (isASCIIByte(bite))
- return bite;
- // 3. Let code point be the index code point for byte − 0x80 in
- // index single-byte.
- var code_point = index[bite - 0x80];
- // 4. If code point is null, return error.
- if (code_point === null)
- return decoderError(fatal);
- // 5. Return a code point whose value is code point.
- return code_point;
- };
- }
- // 10.2 single-byte encoder
- /**
- * @constructor
- * @implements {Encoder}
- * @param {!Array.<?number>} index The encoding index.
- * @param {{fatal: boolean}} options
- */
- function SingleByteEncoder(index, options) {
- var fatal = options.fatal;
- /**
- * @param {Stream} stream Input stream.
- * @param {number} code_point Next code point read from the stream.
- * @return {(number|!Array.<number>)} Byte(s) to emit.
- */
- this.handler = function(stream, code_point) {
- // 1. If code point is end-of-stream, return finished.
- if (code_point === end_of_stream)
- return finished;
- // 2. If code point is an ASCII code point, return a byte whose
- // value is code point.
- if (isASCIICodePoint(code_point))
- return code_point;
- // 3. Let pointer be the index pointer for code point in index
- // single-byte.
- var pointer = indexPointerFor(code_point, index);
- // 4. If pointer is null, return error with code point.
- if (pointer === null)
- encoderError(code_point);
- // 5. Return a byte whose value is pointer + 0x80.
- return pointer + 0x80;
- };
- }
- (function() {
- if (!('encoding-indexes' in global))
- return;
- encodings.forEach(function(category) {
- if (category.heading !== 'Legacy single-byte encodings')
- return;
- category.encodings.forEach(function(encoding) {
- var name = encoding.name;
- var idx = index(name.toLowerCase());
- /** @param {{fatal: boolean}} options */
- decoders[name] = function(options) {
- return new SingleByteDecoder(idx, options);
- };
- /** @param {{fatal: boolean}} options */
- encoders[name] = function(options) {
- return new SingleByteEncoder(idx, options);
- };
- });
- });
- }());
- //
- // 11. Legacy multi-byte Chinese (simplified) encodings
- //
- // 11.1 gbk
- // 11.1.1 gbk decoder
- // gbk's decoder is gb18030's decoder.
- /** @param {{fatal: boolean}} options */
- decoders['GBK'] = function(options) {
- return new GB18030Decoder(options);
- };
- // 11.1.2 gbk encoder
- // gbk's encoder is gb18030's encoder with its gbk flag set.
- /** @param {{fatal: boolean}} options */
- encoders['GBK'] = function(options) {
- return new GB18030Encoder(options, true);
- };
- // 11.2 gb18030
- // 11.2.1 gb18030 decoder
- /**
- * @constructor
- * @implements {Decoder}
- * @param {{fatal: boolean}} options
- */
- function GB18030Decoder(options) {
- var fatal = options.fatal;
- // gb18030's decoder has an associated gb18030 first, gb18030
- // second, and gb18030 third (all initially 0x00).
- var /** @type {number} */ gb18030_first = 0x00,
- /** @type {number} */ gb18030_second = 0x00,
- /** @type {number} */ gb18030_third = 0x00;
- /**
- * @param {Stream} stream The stream of bytes being decoded.
- * @param {number} bite The next byte read from the stream.
- * @return {?(number|!Array.<number>)} The next code point(s)
- * decoded, or null if not enough data exists in the input
- * stream to decode a complete code point.
- */
- this.handler = function(stream, bite) {
- // 1. If byte is end-of-stream and gb18030 first, gb18030
- // second, and gb18030 third are 0x00, return finished.
- if (bite === end_of_stream && gb18030_first === 0x00 &&
- gb18030_second === 0x00 && gb18030_third === 0x00) {
- return finished;
- }
- // 2. If byte is end-of-stream, and gb18030 first, gb18030
- // second, or gb18030 third is not 0x00, set gb18030 first,
- // gb18030 second, and gb18030 third to 0x00, and return error.
- if (bite === end_of_stream &&
- (gb18030_first !== 0x00 || gb18030_second !== 0x00 ||
- gb18030_third !== 0x00)) {
- gb18030_first = 0x00;
- gb18030_second = 0x00;
- gb18030_third = 0x00;
- decoderError(fatal);
- }
- var code_point;
- // 3. If gb18030 third is not 0x00, run these substeps:
- if (gb18030_third !== 0x00) {
- // 1. Let code point be null.
- code_point = null;
- // 2. If byte is in the range 0x30 to 0x39, inclusive, set
- // code point to the index gb18030 ranges code point for
- // (((gb18030 first − 0x81) × 10 + gb18030 second − 0x30) ×
- // 126 + gb18030 third − 0x81) × 10 + byte − 0x30.
- if (inRange(bite, 0x30, 0x39)) {
- code_point = indexGB18030RangesCodePointFor(
- (((gb18030_first - 0x81) * 10 + gb18030_second - 0x30) * 126 +
- gb18030_third - 0x81) * 10 + bite - 0x30);
- }
- // 3. Let buffer be a byte sequence consisting of gb18030
- // second, gb18030 third, and byte, in order.
- var buffer = [gb18030_second, gb18030_third, bite];
- // 4. Set gb18030 first, gb18030 second, and gb18030 third to
- // 0x00.
- gb18030_first = 0x00;
- gb18030_second = 0x00;
- gb18030_third = 0x00;
- // 5. If code point is null, prepend buffer to stream and
- // return error.
- if (code_point === null) {
- stream.prepend(buffer);
- return decoderError(fatal);
- }
- // 6. Return a code point whose value is code point.
- return code_point;
- }
- // 4. If gb18030 second is not 0x00, run these substeps:
- if (gb18030_second !== 0x00) {
- // 1. If byte is in the range 0x81 to 0xFE, inclusive, set
- // gb18030 third to byte and return continue.
- if (inRange(bite, 0x81, 0xFE)) {
- gb18030_third = bite;
- return null;
- }
- // 2. Prepend gb18030 second followed by byte to stream, set
- // gb18030 first and gb18030 second to 0x00, and return error.
- stream.prepend([gb18030_second, bite]);
- gb18030_first = 0x00;
- gb18030_second = 0x00;
- return decoderError(fatal);
- }
- // 5. If gb18030 first is not 0x00, run these substeps:
- if (gb18030_first !== 0x00) {
- // 1. If byte is in the range 0x30 to 0x39, inclusive, set
- // gb18030 second to byte and return continue.
- if (inRange(bite, 0x30, 0x39)) {
- gb18030_second = bite;
- return null;
- }
- // 2. Let lead be gb18030 first, let pointer be null, and set
- // gb18030 first to 0x00.
- var lead = gb18030_first;
- var pointer = null;
- gb18030_first = 0x00;
- // 3. Let offset be 0x40 if byte is less than 0x7F and 0x41
- // otherwise.
- var offset = bite < 0x7F ? 0x40 : 0x41;
- // 4. If byte is in the range 0x40 to 0x7E, inclusive, or 0x80
- // to 0xFE, inclusive, set pointer to (lead − 0x81) × 190 +
- // (byte − offset).
- if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0x80, 0xFE))
- pointer = (lead - 0x81) * 190 + (bite - offset);
- // 5. Let code point be null if pointer is null and the index
- // code point for pointer in index gb18030 otherwise.
- code_point = pointer === null ? null :
- indexCodePointFor(pointer, index('gb18030'));
- // 6. If code point is null and byte is an ASCII byte, prepend
- // byte to stream.
- if (code_point === null && isASCIIByte(bite))
- stream.prepend(bite);
- // 7. If code point is null, return error.
- if (code_point === null)
- return decoderError(fatal);
- // 8. Return a code point whose value is code point.
- return code_point;
- }
- // 6. If byte is an ASCII byte, return a code point whose value
- // is byte.
- if (isASCIIByte(bite))
- return bite;
- // 7. If byte is 0x80, return code point U+20AC.
- if (bite === 0x80)
- return 0x20AC;
- // 8. If byte is in the range 0x81 to 0xFE, inclusive, set
- // gb18030 first to byte and return continue.
- if (inRange(bite, 0x81, 0xFE)) {
- gb18030_first = bite;
- return null;
- }
- // 9. Return error.
- return decoderError(fatal);
- };
- }
- // 11.2.2 gb18030 encoder
- /**
- * @constructor
- * @implements {Encoder}
- * @param {{fatal: boolean}} options
- * @param {boolean=} gbk_flag
- */
- function GB18030Encoder(options, gbk_flag) {
- var fatal = options.fatal;
- // gb18030's decoder has an associated gbk flag (initially unset).
- /**
- * @param {Stream} stream Input stream.
- * @param {number} code_point Next code point read from the stream.
- * @return {(number|!Array.<number>)} Byte(s) to emit.
- */
- this.handler = function(stream, code_point) {
- // 1. If code point is end-of-stream, return finished.
- if (code_point === end_of_stream)
- return finished;
- // 2. If code point is an ASCII code point, return a byte whose
- // value is code point.
- if (isASCIICodePoint(code_point))
- return code_point;
- // 3. If code point is U+E5E5, return error with code point.
- if (code_point === 0xE5E5)
- return encoderError(code_point);
- // 4. If the gbk flag is set and code point is U+20AC, return
- // byte 0x80.
- if (gbk_flag && code_point === 0x20AC)
- return 0x80;
- // 5. Let pointer be the index pointer for code point in index
- // gb18030.
- var pointer = indexPointerFor(code_point, index('gb18030'));
- // 6. If pointer is not null, run these substeps:
- if (pointer !== null) {
- // 1. Let lead be floor(pointer / 190) + 0x81.
- var lead = floor(pointer / 190) + 0x81;
- // 2. Let trail be pointer % 190.
- var trail = pointer % 190;
- // 3. Let offset be 0x40 if trail is less than 0x3F and 0x41 otherwise.
- var offset = trail < 0x3F ? 0x40 : 0x41;
- // 4. Return two bytes whose values are lead and trail + offset.
- return [lead, trail + offset];
- }
- // 7. If gbk flag is set, return error with code point.
- if (gbk_flag)
- return encoderError(code_point);
- // 8. Set pointer to the index gb18030 ranges pointer for code
- // point.
- pointer = indexGB18030RangesPointerFor(code_point);
- // 9. Let byte1 be floor(pointer / 10 / 126 / 10).
- var byte1 = floor(pointer / 10 / 126 / 10);
- // 10. Set pointer to pointer − byte1 × 10 × 126 × 10.
- pointer = pointer - byte1 * 10 * 126 * 10;
- // 11. Let byte2 be floor(pointer / 10 / 126).
- var byte2 = floor(pointer / 10 / 126);
- // 12. Set pointer to pointer − byte2 × 10 × 126.
- pointer = pointer - byte2 * 10 * 126;
- // 13. Let byte3 be floor(pointer / 10).
- var byte3 = floor(pointer / 10);
- // 14. Let byte4 be pointer − byte3 × 10.
- var byte4 = pointer - byte3 * 10;
- // 15. Return four bytes whose values are byte1 + 0x81, byte2 +
- // 0x30, byte3 + 0x81, byte4 + 0x30.
- return [byte1 + 0x81,
- byte2 + 0x30,
- byte3 + 0x81,
- byte4 + 0x30];
- };
- }
- /** @param {{fatal: boolean}} options */
- encoders['gb18030'] = function(options) {
- return new GB18030Encoder(options);
- };
- /** @param {{fatal: boolean}} options */
- decoders['gb18030'] = function(options) {
- return new GB18030Decoder(options);
- };
- //
- // 12. Legacy multi-byte Chinese (traditional) encodings
- //
- // 12.1 Big5
- // 12.1.1 Big5 decoder
- /**
- * @constructor
- * @implements {Decoder}
- * @param {{fatal: boolean}} options
- */
- function Big5Decoder(options) {
- var fatal = options.fatal;
- // Big5's decoder has an associated Big5 lead (initially 0x00).
- var /** @type {number} */ Big5_lead = 0x00;
- /**
- * @param {Stream} stream The stream of bytes being decoded.
- * @param {number} bite The next byte read from the stream.
- * @return {?(number|!Array.<number>)} The next code point(s)
- * decoded, or null if not enough data exists in the input
- * stream to decode a complete code point.
- */
- this.handler = function(stream, bite) {
- // 1. If byte is end-of-stream and Big5 lead is not 0x00, set
- // Big5 lead to 0x00 and return error.
- if (bite === end_of_stream && Big5_lead !== 0x00) {
- Big5_lead = 0x00;
- return decoderError(fatal);
- }
- // 2. If byte is end-of-stream and Big5 lead is 0x00, return
- // finished.
- if (bite === end_of_stream && Big5_lead === 0x00)
- return finished;
- // 3. If Big5 lead is not 0x00, let lead be Big5 lead, let
- // pointer be null, set Big5 lead to 0x00, and then run these
- // substeps:
- if (Big5_lead !== 0x00) {
- var lead = Big5_lead;
- var pointer = null;
- Big5_lead = 0x00;
- // 1. Let offset be 0x40 if byte is less than 0x7F and 0x62
- // otherwise.
- var offset = bite < 0x7F ? 0x40 : 0x62;
- // 2. If byte is in the range 0x40 to 0x7E, inclusive, or 0xA1
- // to 0xFE, inclusive, set pointer to (lead − 0x81) × 157 +
- // (byte − offset).
- if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0xA1, 0xFE))
- pointer = (lead - 0x81) * 157 + (bite - offset);
- // 3. If there is a row in the table below whose first column
- // is pointer, return the two code points listed in its second
- // column
- // Pointer | Code points
- // --------+--------------
- // 1133 | U+00CA U+0304
- // 1135 | U+00CA U+030C
- // 1164 | U+00EA U+0304
- // 1166 | U+00EA U+030C
- switch (pointer) {
- case 1133: return [0x00CA, 0x0304];
- case 1135: return [0x00CA, 0x030C];
- case 1164: return [0x00EA, 0x0304];
- case 1166: return [0x00EA, 0x030C];
- }
- // 4. Let code point be null if pointer is null and the index
- // code point for pointer in index Big5 otherwise.
- var code_point = (pointer === null) ? null :
- indexCodePointFor(pointer, index('big5'));
- // 5. If code point is null and byte is an ASCII byte, prepend
- // byte to stream.
- if (code_point === null && isASCIIByte(bite))
- stream.prepend(bite);
- // 6. If code point is null, return error.
- if (code_point === null)
- return decoderError(fatal);
- // 7. Return a code point whose value is code point.
- return code_point;
- }
- // 4. If byte is an ASCII byte, return a code point whose value
- // is byte.
- if (isASCIIByte(bite))
- return bite;
- // 5. If byte is in the range 0x81 to 0xFE, inclusive, set Big5
- // lead to byte and return continue.
- if (inRange(bite, 0x81, 0xFE)) {
- Big5_lead = bite;
- return null;
- }
- // 6. Return error.
- return decoderError(fatal);
- };
- }
- // 12.1.2 Big5 encoder
- /**
- * @constructor
- * @implements {Encoder}
- * @param {{fatal: boolean}} options
- */
- function Big5Encoder(options) {
- var fatal = options.fatal;
- /**
- * @param {Stream} stream Input stream.
- * @param {number} code_point Next code point read from the stream.
- * @return {(number|!Array.<number>)} Byte(s) to emit.
- */
- this.handler = function(stream, code_point) {
- // 1. If code point is end-of-stream, return finished.
- if (code_point === end_of_stream)
- return finished;
- // 2. If code point is an ASCII code point, return a byte whose
- // value is code point.
- if (isASCIICodePoint(code_point))
- return code_point;
- // 3. Let pointer be the index Big5 pointer for code point.
- var pointer = indexBig5PointerFor(code_point);
- // 4. If pointer is null, return error with code point.
- if (pointer === null)
- return encoderError(code_point);
- // 5. Let lead be floor(pointer / 157) + 0x81.
- var lead = floor(pointer / 157) + 0x81;
- // 6. If lead is less than 0xA1, return error with code point.
- if (lead < 0xA1)
- return encoderError(code_point);
- // 7. Let trail be pointer % 157.
- var trail = pointer % 157;
- // 8. Let offset be 0x40 if trail is less than 0x3F and 0x62
- // otherwise.
- var offset = trail < 0x3F ? 0x40 : 0x62;
- // Return two bytes whose values are lead and trail + offset.
- return [lead, trail + offset];
- };
- }
- /** @param {{fatal: boolean}} options */
- encoders['Big5'] = function(options) {
- return new Big5Encoder(options);
- };
- /** @param {{fatal: boolean}} options */
- decoders['Big5'] = function(options) {
- return new Big5Decoder(options);
- };
- //
- // 13. Legacy multi-byte Japanese encodings
- //
- // 13.1 euc-jp
- // 13.1.1 euc-jp decoder
- /**
- * @constructor
- * @implements {Decoder}
- * @param {{fatal: boolean}} options
- */
- function EUCJPDecoder(options) {
- var fatal = options.fatal;
- // euc-jp's decoder has an associated euc-jp jis0212 flag
- // (initially unset) and euc-jp lead (initially 0x00).
- var /** @type {boolean} */ eucjp_jis0212_flag = false,
- /** @type {number} */ eucjp_lead = 0x00;
- /**
- * @param {Stream} stream The stream of bytes being decoded.
- * @param {number} bite The next byte read from the stream.
- * @return {?(number|!Array.<number>)} The next code point(s)
- * decoded, or null if not enough data exists in the input
- * stream to decode a complete code point.
- */
- this.handler = function(stream, bite) {
- // 1. If byte is end-of-stream and euc-jp lead is not 0x00, set
- // euc-jp lead to 0x00, and return error.
- if (bite === end_of_stream && eucjp_lead !== 0x00) {
- eucjp_lead = 0x00;
- return decoderError(fatal);
- }
- // 2. If byte is end-of-stream and euc-jp lead is 0x00, return
- // finished.
- if (bite === end_of_stream && eucjp_lead === 0x00)
- return finished;
- // 3. If euc-jp lead is 0x8E and byte is in the range 0xA1 to
- // 0xDF, inclusive, set euc-jp lead to 0x00 and return a code
- // point whose value is 0xFF61 − 0xA1 + byte.
- if (eucjp_lead === 0x8E && inRange(bite, 0xA1, 0xDF)) {
- eucjp_lead = 0x00;
- return 0xFF61 - 0xA1 + bite;
- }
- // 4. If euc-jp lead is 0x8F and byte is in the range 0xA1 to
- // 0xFE, inclusive, set the euc-jp jis0212 flag, set euc-jp lead
- // to byte, and return continue.
- if (eucjp_lead === 0x8F && inRange(bite, 0xA1, 0xFE)) {
- eucjp_jis0212_flag = true;
- eucjp_lead = bite;
- return null;
- }
- // 5. If euc-jp lead is not 0x00, let lead be euc-jp lead, set
- // euc-jp lead to 0x00, and run these substeps:
- if (eucjp_lead !== 0x00) {
- var lead = eucjp_lead;
- eucjp_lead = 0x00;
- // 1. Let code point be null.
- var code_point = null;
- // 2. If lead and byte are both in the range 0xA1 to 0xFE,
- // inclusive, set code point to the index code point for (lead
- // − 0xA1) × 94 + byte − 0xA1 in index jis0208 if the euc-jp
- // jis0212 flag is unset and in index jis0212 otherwise.
- if (inRange(lead, 0xA1, 0xFE) && inRange(bite, 0xA1, 0xFE)) {
- code_point = indexCodePointFor(
- (lead - 0xA1) * 94 + (bite - 0xA1),
- index(!eucjp_jis0212_flag ? 'jis0208' : 'jis0212'));
- }
- // 3. Unset the euc-jp jis0212 flag.
- eucjp_jis0212_flag = false;
- // 4. If byte is not in the range 0xA1 to 0xFE, inclusive,
- // prepend byte to stream.
- if (!inRange(bite, 0xA1, 0xFE))
- stream.prepend(bite);
- // 5. If code point is null, return error.
- if (code_point === null)
- return decoderError(fatal);
- // 6. Return a code point whose value is code point.
- return code_point;
- }
- // 6. If byte is an ASCII byte, return a code point whose value
- // is byte.
- if (isASCIIByte(bite))
- return bite;
- // 7. If byte is 0x8E, 0x8F, or in the range 0xA1 to 0xFE,
- // inclusive, set euc-jp lead to byte and return continue.
- if (bite === 0x8E || bite === 0x8F || inRange(bite, 0xA1, 0xFE)) {
- eucjp_lead = bite;
- return null;
- }
- // 8. Return error.
- return decoderError(fatal);
- };
- }
- // 13.1.2 euc-jp encoder
- /**
- * @constructor
- * @implements {Encoder}
- * @param {{fatal: boolean}} options
- */
- function EUCJPEncoder(options) {
- var fatal = options.fatal;
- /**
- * @param {Stream} stream Input stream.
- * @param {number} code_point Next code point read from the stream.
- * @return {(number|!Array.<number>)} Byte(s) to emit.
- */
- this.handler = function(stream, code_point) {
- // 1. If code point is end-of-stream, return finished.
- if (code_point === end_of_stream)
- return finished;
- // 2. If code point is an ASCII code point, return a byte whose
- // value is code point.
- if (isASCIICodePoint(code_point))
- return code_point;
- // 3. If code point is U+00A5, return byte 0x5C.
- if (code_point === 0x00A5)
- return 0x5C;
- // 4. If code point is U+203E, return byte 0x7E.
- if (code_point === 0x203E)
- return 0x7E;
- // 5. If code point is in the range U+FF61 to U+FF9F, inclusive,
- // return two bytes whose values are 0x8E and code point −
- // 0xFF61 + 0xA1.
- if (inRange(code_point, 0xFF61, 0xFF9F))
- return [0x8E, code_point - 0xFF61 + 0xA1];
- // 6. If code point is U+2212, set it to U+FF0D.
- if (code_point === 0x2212)
- code_point = 0xFF0D;
- // 7. Let pointer be the index pointer for code point in index
- // jis0208.
- var pointer = indexPointerFor(code_point, index('jis0208'));
- // 8. If pointer is null, return error with code point.
- if (pointer === null)
- return encoderError(code_point);
- // 9. Let lead be floor(pointer / 94) + 0xA1.
- var lead = floor(pointer / 94) + 0xA1;
- // 10. Let trail be pointer % 94 + 0xA1.
- var trail = pointer % 94 + 0xA1;
- // 11. Return two bytes whose values are lead and trail.
- return [lead, trail];
- };
- }
- /** @param {{fatal: boolean}} options */
- encoders['EUC-JP'] = function(options) {
- return new EUCJPEncoder(options);
- };
- /** @param {{fatal: boolean}} options */
- decoders['EUC-JP'] = function(options) {
- return new EUCJPDecoder(options);
- };
- // 13.2 iso-2022-jp
- // 13.2.1 iso-2022-jp decoder
- /**
- * @constructor
- * @implements {Decoder}
- * @param {{fatal: boolean}} options
- */
- function ISO2022JPDecoder(options) {
- var fatal = options.fatal;
- /** @enum */
- var states = {
- ASCII: 0,
- Roman: 1,
- Katakana: 2,
- LeadByte: 3,
- TrailByte: 4,
- EscapeStart: 5,
- Escape: 6
- };
- // iso-2022-jp's decoder has an associated iso-2022-jp decoder
- // state (initially ASCII), iso-2022-jp decoder output state
- // (initially ASCII), iso-2022-jp lead (initially 0x00), and
- // iso-2022-jp output flag (initially unset).
- var /** @type {number} */ iso2022jp_decoder_state = states.ASCII,
- /** @type {number} */ iso2022jp_decoder_output_state = states.ASCII,
- /** @type {number} */ iso2022jp_lead = 0x00,
- /** @type {boolean} */ iso2022jp_output_flag = false;
- /**
- * @param {Stream} stream The stream of bytes being decoded.
- * @param {number} bite The next byte read from the stream.
- * @return {?(number|!Array.<number>)} The next code point(s)
- * decoded, or null if not enough data exists in the input
- * stream to decode a complete code point.
- */
- this.handler = function(stream, bite) {
- // switching on iso-2022-jp decoder state:
- switch (iso2022jp_decoder_state) {
- default:
- case states.ASCII:
- // ASCII
- // Based on byte:
- // 0x1B
- if (bite === 0x1B) {
- // Set iso-2022-jp decoder state to escape start and return
- // continue.
- iso2022jp_decoder_state = states.EscapeStart;
- return null;
- }
- // 0x00 to 0x7F, excluding 0x0E, 0x0F, and 0x1B
- if (inRange(bite, 0x00, 0x7F) && bite !== 0x0E
- && bite !== 0x0F && bite !== 0x1B) {
- // Unset the iso-2022-jp output flag and return a code point
- // whose value is byte.
- iso2022jp_output_flag = false;
- return bite;
- }
- // end-of-stream
- if (bite === end_of_stream) {
- // Return finished.
- return finished;
- }
- // Otherwise
- // Unset the iso-2022-jp output flag and return error.
- iso2022jp_output_flag = false;
- return decoderError(fatal);
- case states.Roman:
- // Roman
- // Based on byte:
- // 0x1B
- if (bite === 0x1B) {
- // Set iso-2022-jp decoder state to escape start and return
- // continue.
- iso2022jp_decoder_state = states.EscapeStart;
- return null;
- }
- // 0x5C
- if (bite === 0x5C) {
- // Unset the iso-2022-jp output flag and return code point
- // U+00A5.
- iso2022jp_output_flag = false;
- return 0x00A5;
- }
- // 0x7E
- if (bite === 0x7E) {
- // Unset the iso-2022-jp output flag and return code point
- // U+203E.
- iso2022jp_output_flag = false;
- return 0x203E;
- }
- // 0x00 to 0x7F, excluding 0x0E, 0x0F, 0x1B, 0x5C, and 0x7E
- if (inRange(bite, 0x00, 0x7F) && bite !== 0x0E && bite !== 0x0F
- && bite !== 0x1B && bite !== 0x5C && bite !== 0x7E) {
- // Unset the iso-2022-jp output flag and return a code point
- // whose value is byte.
- iso2022jp_output_flag = false;
- return bite;
- }
- // end-of-stream
- if (bite === end_of_stream) {
- // Return finished.
- return finished;
- }
- // Otherwise
- // Unset the iso-2022-jp output flag and return error.
- iso2022jp_output_flag = false;
- return decoderError(fatal);
- case states.Katakana:
- // Katakana
- // Based on byte:
- // 0x1B
- if (bite === 0x1B) {
- // Set iso-2022-jp decoder state to escape start and return
- // continue.
- iso2022jp_decoder_state = states.EscapeStart;
- return null;
- }
- // 0x21 to 0x5F
- if (inRange(bite, 0x21, 0x5F)) {
- // Unset the iso-2022-jp output flag and return a code point
- // whose value is 0xFF61 − 0x21 + byte.
- iso2022jp_output_flag = false;
- return 0xFF61 - 0x21 + bite;
- }
- // end-of-stream
- if (bite === end_of_stream) {
- // Return finished.
- return finished;
- }
- // Otherwise
- // Unset the iso-2022-jp output flag and return error.
- iso2022jp_output_flag = false;
- return decoderError(fatal);
- case states.LeadByte:
- // Lead byte
- // Based on byte:
- // 0x1B
- if (bite === 0x1B) {
- // Set iso-2022-jp decoder state to escape start and return
- // continue.
- iso2022jp_decoder_state = states.EscapeStart;
- return null;
- }
- // 0x21 to 0x7E
- if (inRange(bite, 0x21, 0x7E)) {
- // Unset the iso-2022-jp output flag, set iso-2022-jp lead
- // to byte, iso-2022-jp decoder state to trail byte, and
- // return continue.
- iso2022jp_output_flag = false;
- iso2022jp_lead = bite;
- iso2022jp_decoder_state = states.TrailByte;
- return null;
- }
- // end-of-stream
- if (bite === end_of_stream) {
- // Return finished.
- return finished;
- }
- // Otherwise
- // Unset the iso-2022-jp output flag and return error.
- iso2022jp_output_flag = false;
- return decoderError(fatal);
- case states.TrailByte:
- // Trail byte
- // Based on byte:
- // 0x1B
- if (bite === 0x1B) {
- // Set iso-2022-jp decoder state to escape start and return
- // continue.
- iso2022jp_decoder_state = states.EscapeStart;
- return decoderError(fatal);
- }
- // 0x21 to 0x7E
- if (inRange(bite, 0x21, 0x7E)) {
- // 1. Set the iso-2022-jp decoder state to lead byte.
- iso2022jp_decoder_state = states.LeadByte;
- // 2. Let pointer be (iso-2022-jp lead − 0x21) × 94 + byte − 0x21.
- var pointer = (iso2022jp_lead - 0x21) * 94 + bite - 0x21;
- // 3. Let code point be the index code point for pointer in
- // index jis0208.
- var code_point = indexCodePointFor(pointer, index('jis0208'));
- // 4. If code point is null, return error.
- if (code_point === null)
- return decoderError(fatal);
- // 5. Return a code point whose value is code point.
- return code_point;
- }
- // end-of-stream
- if (bite === end_of_stream) {
- // Set the iso-2022-jp decoder state to lead byte, prepend
- // byte to stream, and return error.
- iso2022jp_decoder_state = states.LeadByte;
- stream.prepend(bite);
- return decoderError(fatal);
- }
- // Otherwise
- // Set iso-2022-jp decoder state to lead byte and return
- // error.
- iso2022jp_decoder_state = states.LeadByte;
- return decoderError(fatal);
- case states.EscapeStart:
- // Escape start
- // 1. If byte is either 0x24 or 0x28, set iso-2022-jp lead to
- // byte, iso-2022-jp decoder state to escape, and return
- // continue.
- if (bite === 0x24 || bite === 0x28) {
- iso2022jp_lead = bite;
- iso2022jp_decoder_state = states.Escape;
- return null;
- }
- // 2. Prepend byte to stream.
- stream.prepend(bite);
- // 3. Unset the iso-2022-jp output flag, set iso-2022-jp
- // decoder state to iso-2022-jp decoder output state, and
- // return error.
- iso2022jp_output_flag = false;
- iso2022jp_decoder_state = iso2022jp_decoder_output_state;
- return decoderError(fatal);
- case states.Escape:
- // Escape
- // 1. Let lead be iso-2022-jp lead and set iso-2022-jp lead to
- // 0x00.
- var lead = iso2022jp_lead;
- iso2022jp_lead = 0x00;
- // 2. Let state be null.
- var state = null;
- // 3. If lead is 0x28 and byte is 0x42, set state to ASCII.
- if (lead === 0x28 && bite === 0x42)
- state = states.ASCII;
- // 4. If lead is 0x28 and byte is 0x4A, set state to Roman.
- if (lead === 0x28 && bite === 0x4A)
- state = states.Roman;
- // 5. If lead is 0x28 and byte is 0x49, set state to Katakana.
- if (lead === 0x28 && bite === 0x49)
- state = states.Katakana;
- // 6. If lead is 0x24 and byte is either 0x40 or 0x42, set
- // state to lead byte.
- if (lead === 0x24 && (bite === 0x40 || bite === 0x42))
- state = states.LeadByte;
- // 7. If state is non-null, run these substeps:
- if (state !== null) {
- // 1. Set iso-2022-jp decoder state and iso-2022-jp decoder
- // output state to states.
- iso2022jp_decoder_state = iso2022jp_decoder_state = state;
- // 2. Let output flag be the iso-2022-jp output flag.
- var output_flag = iso2022jp_output_flag;
- // 3. Set the iso-2022-jp output flag.
- iso2022jp_output_flag = true;
- // 4. Return continue, if output flag is unset, and error
- // otherwise.
- return !output_flag ? null : decoderError(fatal);
- }
- // 8. Prepend lead and byte to stream.
- stream.prepend([lead, bite]);
- // 9. Unset the iso-2022-jp output flag, set iso-2022-jp
- // decoder state to iso-2022-jp decoder output state and
- // return error.
- iso2022jp_output_flag = false;
- iso2022jp_decoder_state = iso2022jp_decoder_output_state;
- return decoderError(fatal);
- }
- };
- }
- // 13.2.2 iso-2022-jp encoder
- /**
- * @constructor
- * @implements {Encoder}
- * @param {{fatal: boolean}} options
- */
- function ISO2022JPEncoder(options) {
- var fatal = options.fatal;
- // iso-2022-jp's encoder has an associated iso-2022-jp encoder
- // state which is one of ASCII, Roman, and jis0208 (initially
- // ASCII).
- /** @enum */
- var states = {
- ASCII: 0,
- Roman: 1,
- jis0208: 2
- };
- var /** @type {number} */ iso2022jp_state = states.ASCII;
- /**
- * @param {Stream} stream Input stream.
- * @param {number} code_point Next code point read from the stream.
- * @return {(number|!Array.<number>)} Byte(s) to emit.
- */
- this.handler = function(stream, code_point) {
- // 1. If code point is end-of-stream and iso-2022-jp encoder
- // state is not ASCII, prepend code point to stream, set
- // iso-2022-jp encoder state to ASCII, and return three bytes
- // 0x1B 0x28 0x42.
- if (code_point === end_of_stream &&
- iso2022jp_state !== states.ASCII) {
- stream.prepend(code_point);
- iso2022jp_state = states.ASCII;
- return [0x1B, 0x28, 0x42];
- }
- // 2. If code point is end-of-stream and iso-2022-jp encoder
- // state is ASCII, return finished.
- if (code_point === end_of_stream && iso2022jp_state === states.ASCII)
- return finished;
- // 3. If ISO-2022-JP encoder state is ASCII or Roman, and code
- // point is U+000E, U+000F, or U+001B, return error with U+FFFD.
- if ((iso2022jp_state === states.ASCII ||
- iso2022jp_state === states.Roman) &&
- (code_point === 0x000E || code_point === 0x000F ||
- code_point === 0x001B)) {
- return encoderError(0xFFFD);
- }
- // 4. If iso-2022-jp encoder state is ASCII and code point is an
- // ASCII code point, return a byte whose value is code point.
- if (iso2022jp_state === states.ASCII &&
- isASCIICodePoint(code_point))
- return code_point;
- // 5. If iso-2022-jp encoder state is Roman and code point is an
- // ASCII code point, excluding U+005C and U+007E, or is U+00A5
- // or U+203E, run these substeps:
- if (iso2022jp_state === states.Roman &&
- ((isASCIICodePoint(code_point) &&
- code_point !== 0x005C && code_point !== 0x007E) ||
- (code_point == 0x00A5 || code_point == 0x203E))) {
- // 1. If code point is an ASCII code point, return a byte
- // whose value is code point.
- if (isASCIICodePoint(code_point))
- return code_point;
- // 2. If code point is U+00A5, return byte 0x5C.
- if (code_point === 0x00A5)
- return 0x5C;
- // 3. If code point is U+203E, return byte 0x7E.
- if (code_point === 0x203E)
- return 0x7E;
- }
- // 6. If code point is an ASCII code point, and iso-2022-jp
- // encoder state is not ASCII, prepend code point to stream, set
- // iso-2022-jp encoder state to ASCII, and return three bytes
- // 0x1B 0x28 0x42.
- if (isASCIICodePoint(code_point) &&
- iso2022jp_state !== states.ASCII) {
- stream.prepend(code_point);
- iso2022jp_state = states.ASCII;
- return [0x1B, 0x28, 0x42];
- }
- // 7. If code point is either U+00A5 or U+203E, and iso-2022-jp
- // encoder state is not Roman, prepend code point to stream, set
- // iso-2022-jp encoder state to Roman, and return three bytes
- // 0x1B 0x28 0x4A.
- if ((code_point === 0x00A5 || code_point === 0x203E) &&
- iso2022jp_state !== states.Roman) {
- stream.prepend(code_point);
- iso2022jp_state = states.Roman;
- return [0x1B, 0x28, 0x4A];
- }
- // 8. If code point is U+2212, set it to U+FF0D.
- if (code_point === 0x2212)
- code_point = 0xFF0D;
- // 9. Let pointer be the index pointer for code point in index
- // jis0208.
- var pointer = indexPointerFor(code_point, index('jis0208'));
- // 10. If pointer is null, return error with code point.
- if (pointer === null)
- return encoderError(code_point);
- // 11. If iso-2022-jp encoder state is not jis0208, prepend code
- // point to stream, set iso-2022-jp encoder state to jis0208,
- // and return three bytes 0x1B 0x24 0x42.
- if (iso2022jp_state !== states.jis0208) {
- stream.prepend(code_point);
- iso2022jp_state = states.jis0208;
- return [0x1B, 0x24, 0x42];
- }
- // 12. Let lead be floor(pointer / 94) + 0x21.
- var lead = floor(pointer / 94) + 0x21;
- // 13. Let trail be pointer % 94 + 0x21.
- var trail = pointer % 94 + 0x21;
- // 14. Return two bytes whose values are lead and trail.
- return [lead, trail];
- };
- }
- /** @param {{fatal: boolean}} options */
- encoders['ISO-2022-JP'] = function(options) {
- return new ISO2022JPEncoder(options);
- };
- /** @param {{fatal: boolean}} options */
- decoders['ISO-2022-JP'] = function(options) {
- return new ISO2022JPDecoder(options);
- };
- // 13.3 Shift_JIS
- // 13.3.1 Shift_JIS decoder
- /**
- * @constructor
- * @implements {Decoder}
- * @param {{fatal: boolean}} options
- */
- function ShiftJISDecoder(options) {
- var fatal = options.fatal;
- // Shift_JIS's decoder has an associated Shift_JIS lead (initially
- // 0x00).
- var /** @type {number} */ Shift_JIS_lead = 0x00;
- /**
- * @param {Stream} stream The stream of bytes being decoded.
- * @param {number} bite The next byte read from the stream.
- * @return {?(number|!Array.<number>)} The next code point(s)
- * decoded, or null if not enough data exists in the input
- * stream to decode a complete code point.
- */
- this.handler = function(stream, bite) {
- // 1. If byte is end-of-stream and Shift_JIS lead is not 0x00,
- // set Shift_JIS lead to 0x00 and return error.
- if (bite === end_of_stream && Shift_JIS_lead !== 0x00) {
- Shift_JIS_lead = 0x00;
- return decoderError(fatal);
- }
- // 2. If byte is end-of-stream and Shift_JIS lead is 0x00,
- // return finished.
- if (bite === end_of_stream && Shift_JIS_lead === 0x00)
- return finished;
- // 3. If Shift_JIS lead is not 0x00, let lead be Shift_JIS lead,
- // let pointer be null, set Shift_JIS lead to 0x00, and then run
- // these substeps:
- if (Shift_JIS_lead !== 0x00) {
- var lead = Shift_JIS_lead;
- var pointer = null;
- Shift_JIS_lead = 0x00;
- // 1. Let offset be 0x40, if byte is less than 0x7F, and 0x41
- // otherwise.
- var offset = (bite < 0x7F) ? 0x40 : 0x41;
- // 2. Let lead offset be 0x81, if lead is less than 0xA0, and
- // 0xC1 otherwise.
- var lead_offset = (lead < 0xA0) ? 0x81 : 0xC1;
- // 3. If byte is in the range 0x40 to 0x7E, inclusive, or 0x80
- // to 0xFC, inclusive, set pointer to (lead − lead offset) ×
- // 188 + byte − offset.
- if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0x80, 0xFC))
- pointer = (lead - lead_offset) * 188 + bite - offset;
- // 4. If pointer is in the range 8836 to 10715, inclusive,
- // return a code point whose value is 0xE000 − 8836 + pointer.
- if (inRange(pointer, 8836, 10715))
- return 0xE000 - 8836 + pointer;
- // 5. Let code point be null, if pointer is null, and the
- // index code point for pointer in index jis0208 otherwise.
- var code_point = (pointer === null) ? null :
- indexCodePointFor(pointer, index('jis0208'));
- // 6. If code point is null and byte is an ASCII byte, prepend
- // byte to stream.
- if (code_point === null && isASCIIByte(bite))
- stream.prepend(bite);
- // 7. If code point is null, return error.
- if (code_point === null)
- return decoderError(fatal);
- // 8. Return a code point whose value is code point.
- return code_point;
- }
- // 4. If byte is an ASCII byte or 0x80, return a code point
- // whose value is byte.
- if (isASCIIByte(bite) || bite === 0x80)
- return bite;
- // 5. If byte is in the range 0xA1 to 0xDF, inclusive, return a
- // code point whose value is 0xFF61 − 0xA1 + byte.
- if (inRange(bite, 0xA1, 0xDF))
- return 0xFF61 - 0xA1 + bite;
- // 6. If byte is in the range 0x81 to 0x9F, inclusive, or 0xE0
- // to 0xFC, inclusive, set Shift_JIS lead to byte and return
- // continue.
- if (inRange(bite, 0x81, 0x9F) || inRange(bite, 0xE0, 0xFC)) {
- Shift_JIS_lead = bite;
- return null;
- }
- // 7. Return error.
- return decoderError(fatal);
- };
- }
- // 13.3.2 Shift_JIS encoder
- /**
- * @constructor
- * @implements {Encoder}
- * @param {{fatal: boolean}} options
- */
- function ShiftJISEncoder(options) {
- var fatal = options.fatal;
- /**
- * @param {Stream} stream Input stream.
- * @param {number} code_point Next code point read from the stream.
- * @return {(number|!Array.<number>)} Byte(s) to emit.
- */
- this.handler = function(stream, code_point) {
- // 1. If code point is end-of-stream, return finished.
- if (code_point === end_of_stream)
- return finished;
- // 2. If code point is an ASCII code point or U+0080, return a
- // byte whose value is code point.
- if (isASCIICodePoint(code_point) || code_point === 0x0080)
- return code_point;
- // 3. If code point is U+00A5, return byte 0x5C.
- if (code_point === 0x00A5)
- return 0x5C;
- // 4. If code point is U+203E, return byte 0x7E.
- if (code_point === 0x203E)
- return 0x7E;
- // 5. If code point is in the range U+FF61 to U+FF9F, inclusive,
- // return a byte whose value is code point − 0xFF61 + 0xA1.
- if (inRange(code_point, 0xFF61, 0xFF9F))
- return code_point - 0xFF61 + 0xA1;
- // 6. If code point is U+2212, set it to U+FF0D.
- if (code_point === 0x2212)
- code_point = 0xFF0D;
- // 7. Let pointer be the index Shift_JIS pointer for code point.
- var pointer = indexShiftJISPointerFor(code_point);
- // 8. If pointer is null, return error with code point.
- if (pointer === null)
- return encoderError(code_point);
- // 9. Let lead be floor(pointer / 188).
- var lead = floor(pointer / 188);
- // 10. Let lead offset be 0x81, if lead is less than 0x1F, and
- // 0xC1 otherwise.
- var lead_offset = (lead < 0x1F) ? 0x81 : 0xC1;
- // 11. Let trail be pointer % 188.
- var trail = pointer % 188;
- // 12. Let offset be 0x40, if trail is less than 0x3F, and 0x41
- // otherwise.
- var offset = (trail < 0x3F) ? 0x40 : 0x41;
- // 13. Return two bytes whose values are lead + lead offset and
- // trail + offset.
- return [lead + lead_offset, trail + offset];
- };
- }
- /** @param {{fatal: boolean}} options */
- encoders['Shift_JIS'] = function(options) {
- return new ShiftJISEncoder(options);
- };
- /** @param {{fatal: boolean}} options */
- decoders['Shift_JIS'] = function(options) {
- return new ShiftJISDecoder(options);
- };
- //
- // 14. Legacy multi-byte Korean encodings
- //
- // 14.1 euc-kr
- // 14.1.1 euc-kr decoder
- /**
- * @constructor
- * @implements {Decoder}
- * @param {{fatal: boolean}} options
- */
- function EUCKRDecoder(options) {
- var fatal = options.fatal;
- // euc-kr's decoder has an associated euc-kr lead (initially 0x00).
- var /** @type {number} */ euckr_lead = 0x00;
- /**
- * @param {Stream} stream The stream of bytes being decoded.
- * @param {number} bite The next byte read from the stream.
- * @return {?(number|!Array.<number>)} The next code point(s)
- * decoded, or null if not enough data exists in the input
- * stream to decode a complete code point.
- */
- this.handler = function(stream, bite) {
- // 1. If byte is end-of-stream and euc-kr lead is not 0x00, set
- // euc-kr lead to 0x00 and return error.
- if (bite === end_of_stream && euckr_lead !== 0) {
- euckr_lead = 0x00;
- return decoderError(fatal);
- }
- // 2. If byte is end-of-stream and euc-kr lead is 0x00, return
- // finished.
- if (bite === end_of_stream && euckr_lead === 0)
- return finished;
- // 3. If euc-kr lead is not 0x00, let lead be euc-kr lead, let
- // pointer be null, set euc-kr lead to 0x00, and then run these
- // substeps:
- if (euckr_lead !== 0x00) {
- var lead = euckr_lead;
- var pointer = null;
- euckr_lead = 0x00;
- // 1. If byte is in the range 0x41 to 0xFE, inclusive, set
- // pointer to (lead − 0x81) × 190 + (byte − 0x41).
- if (inRange(bite, 0x41, 0xFE))
- pointer = (lead - 0x81) * 190 + (bite - 0x41);
- // 2. Let code point be null, if pointer is null, and the
- // index code point for pointer in index euc-kr otherwise.
- var code_point = (pointer === null)
- ? null : indexCodePointFor(pointer, index('euc-kr'));
- // 3. If code point is null and byte is an ASCII byte, prepend
- // byte to stream.
- if (pointer === null && isASCIIByte(bite))
- stream.prepend(bite);
- // 4. If code point is null, return error.
- if (code_point === null)
- return decoderError(fatal);
- // 5. Return a code point whose value is code point.
- return code_point;
- }
- // 4. If byte is an ASCII byte, return a code point whose value
- // is byte.
- if (isASCIIByte(bite))
- return bite;
- // 5. If byte is in the range 0x81 to 0xFE, inclusive, set
- // euc-kr lead to byte and return continue.
- if (inRange(bite, 0x81, 0xFE)) {
- euckr_lead = bite;
- return null;
- }
- // 6. Return error.
- return decoderError(fatal);
- };
- }
- // 14.1.2 euc-kr encoder
- /**
- * @constructor
- * @implements {Encoder}
- * @param {{fatal: boolean}} options
- */
- function EUCKREncoder(options) {
- var fatal = options.fatal;
- /**
- * @param {Stream} stream Input stream.
- * @param {number} code_point Next code point read from the stream.
- * @return {(number|!Array.<number>)} Byte(s) to emit.
- */
- this.handler = function(stream, code_point) {
- // 1. If code point is end-of-stream, return finished.
- if (code_point === end_of_stream)
- return finished;
- // 2. If code point is an ASCII code point, return a byte whose
- // value is code point.
- if (isASCIICodePoint(code_point))
- return code_point;
- // 3. Let pointer be the index pointer for code point in index
- // euc-kr.
- var pointer = indexPointerFor(code_point, index('euc-kr'));
- // 4. If pointer is null, return error with code point.
- if (pointer === null)
- return encoderError(code_point);
- // 5. Let lead be floor(pointer / 190) + 0x81.
- var lead = floor(pointer / 190) + 0x81;
- // 6. Let trail be pointer % 190 + 0x41.
- var trail = (pointer % 190) + 0x41;
- // 7. Return two bytes whose values are lead and trail.
- return [lead, trail];
- };
- }
- /** @param {{fatal: boolean}} options */
- encoders['EUC-KR'] = function(options) {
- return new EUCKREncoder(options);
- };
- /** @param {{fatal: boolean}} options */
- decoders['EUC-KR'] = function(options) {
- return new EUCKRDecoder(options);
- };
- //
- // 15. Legacy miscellaneous encodings
- //
- // 15.1 replacement
- // Not needed - API throws RangeError
- // 15.2 Common infrastructure for utf-16be and utf-16le
- /**
- * @param {number} code_unit
- * @param {boolean} utf16be
- * @return {!Array.<number>} bytes
- */
- function convertCodeUnitToBytes(code_unit, utf16be) {
- // 1. Let byte1 be code unit >> 8.
- var byte1 = code_unit >> 8;
- // 2. Let byte2 be code unit & 0x00FF.
- var byte2 = code_unit & 0x00FF;
- // 3. Then return the bytes in order:
- // utf-16be flag is set: byte1, then byte2.
- if (utf16be)
- return [byte1, byte2];
- // utf-16be flag is unset: byte2, then byte1.
- return [byte2, byte1];
- }
- // 15.2.1 shared utf-16 decoder
- /**
- * @constructor
- * @implements {Decoder}
- * @param {boolean} utf16_be True if big-endian, false if little-endian.
- * @param {{fatal: boolean}} options
- */
- function UTF16Decoder(utf16_be, options) {
- var fatal = options.fatal;
- var /** @type {?number} */ utf16_lead_byte = null,
- /** @type {?number} */ utf16_lead_surrogate = null;
- /**
- * @param {Stream} stream The stream of bytes being decoded.
- * @param {number} bite The next byte read from the stream.
- * @return {?(number|!Array.<number>)} The next code point(s)
- * decoded, or null if not enough data exists in the input
- * stream to decode a complete code point.
- */
- this.handler = function(stream, bite) {
- // 1. If byte is end-of-stream and either utf-16 lead byte or
- // utf-16 lead surrogate is not null, set utf-16 lead byte and
- // utf-16 lead surrogate to null, and return error.
- if (bite === end_of_stream && (utf16_lead_byte !== null ||
- utf16_lead_surrogate !== null)) {
- return decoderError(fatal);
- }
- // 2. If byte is end-of-stream and utf-16 lead byte and utf-16
- // lead surrogate are null, return finished.
- if (bite === end_of_stream && utf16_lead_byte === null &&
- utf16_lead_surrogate === null) {
- return finished;
- }
- // 3. If utf-16 lead byte is null, set utf-16 lead byte to byte
- // and return continue.
- if (utf16_lead_byte === null) {
- utf16_lead_byte = bite;
- return null;
- }
- // 4. Let code unit be the result of:
- var code_unit;
- if (utf16_be) {
- // utf-16be decoder flag is set
- // (utf-16 lead byte << 8) + byte.
- code_unit = (utf16_lead_byte << 8) + bite;
- } else {
- // utf-16be decoder flag is unset
- // (byte << 8) + utf-16 lead byte.
- code_unit = (bite << 8) + utf16_lead_byte;
- }
- // Then set utf-16 lead byte to null.
- utf16_lead_byte = null;
- // 5. If utf-16 lead surrogate is not null, let lead surrogate
- // be utf-16 lead surrogate, set utf-16 lead surrogate to null,
- // and then run these substeps:
- if (utf16_lead_surrogate !== null) {
- var lead_surrogate = utf16_lead_surrogate;
- utf16_lead_surrogate = null;
- // 1. If code unit is in the range U+DC00 to U+DFFF,
- // inclusive, return a code point whose value is 0x10000 +
- // ((lead surrogate − 0xD800) << 10) + (code unit − 0xDC00).
- if (inRange(code_unit, 0xDC00, 0xDFFF)) {
- return 0x10000 + (lead_surrogate - 0xD800) * 0x400 +
- (code_unit - 0xDC00);
- }
- // 2. Prepend the sequence resulting of converting code unit
- // to bytes using utf-16be decoder flag to stream and return
- // error.
- stream.prepend(convertCodeUnitToBytes(code_unit, utf16_be));
- return decoderError(fatal);
- }
- // 6. If code unit is in the range U+D800 to U+DBFF, inclusive,
- // set utf-16 lead surrogate to code unit and return continue.
- if (inRange(code_unit, 0xD800, 0xDBFF)) {
- utf16_lead_surrogate = code_unit;
- return null;
- }
- // 7. If code unit is in the range U+DC00 to U+DFFF, inclusive,
- // return error.
- if (inRange(code_unit, 0xDC00, 0xDFFF))
- return decoderError(fatal);
- // 8. Return code point code unit.
- return code_unit;
- };
- }
- // 15.2.2 shared utf-16 encoder
- /**
- * @constructor
- * @implements {Encoder}
- * @param {boolean} utf16_be True if big-endian, false if little-endian.
- * @param {{fatal: boolean}} options
- */
- function UTF16Encoder(utf16_be, options) {
- var fatal = options.fatal;
- /**
- * @param {Stream} stream Input stream.
- * @param {number} code_point Next code point read from the stream.
- * @return {(number|!Array.<number>)} Byte(s) to emit.
- */
- this.handler = function(stream, code_point) {
- // 1. If code point is end-of-stream, return finished.
- if (code_point === end_of_stream)
- return finished;
- // 2. If code point is in the range U+0000 to U+FFFF, inclusive,
- // return the sequence resulting of converting code point to
- // bytes using utf-16be encoder flag.
- if (inRange(code_point, 0x0000, 0xFFFF))
- return convertCodeUnitToBytes(code_point, utf16_be);
- // 3. Let lead be ((code point − 0x10000) >> 10) + 0xD800,
- // converted to bytes using utf-16be encoder flag.
- var lead = convertCodeUnitToBytes(
- ((code_point - 0x10000) >> 10) + 0xD800, utf16_be);
- // 4. Let trail be ((code point − 0x10000) & 0x3FF) + 0xDC00,
- // converted to bytes using utf-16be encoder flag.
- var trail = convertCodeUnitToBytes(
- ((code_point - 0x10000) & 0x3FF) + 0xDC00, utf16_be);
- // 5. Return a byte sequence of lead followed by trail.
- return lead.concat(trail);
- };
- }
- // 15.3 utf-16be
- // 15.3.1 utf-16be decoder
- /** @param {{fatal: boolean}} options */
- encoders['UTF-16BE'] = function(options) {
- return new UTF16Encoder(true, options);
- };
- // 15.3.2 utf-16be encoder
- /** @param {{fatal: boolean}} options */
- decoders['UTF-16BE'] = function(options) {
- return new UTF16Decoder(true, options);
- };
- // 15.4 utf-16le
- // 15.4.1 utf-16le decoder
- /** @param {{fatal: boolean}} options */
- encoders['UTF-16LE'] = function(options) {
- return new UTF16Encoder(false, options);
- };
- // 15.4.2 utf-16le encoder
- /** @param {{fatal: boolean}} options */
- decoders['UTF-16LE'] = function(options) {
- return new UTF16Decoder(false, options);
- };
- // 15.5 x-user-defined
- // 15.5.1 x-user-defined decoder
- /**
- * @constructor
- * @implements {Decoder}
- * @param {{fatal: boolean}} options
- */
- function XUserDefinedDecoder(options) {
- var fatal = options.fatal;
- /**
- * @param {Stream} stream The stream of bytes being decoded.
- * @param {number} bite The next byte read from the stream.
- * @return {?(number|!Array.<number>)} The next code point(s)
- * decoded, or null if not enough data exists in the input
- * stream to decode a complete code point.
- */
- this.handler = function(stream, bite) {
- // 1. If byte is end-of-stream, return finished.
- if (bite === end_of_stream)
- return finished;
- // 2. If byte is an ASCII byte, return a code point whose value
- // is byte.
- if (isASCIIByte(bite))
- return bite;
- // 3. Return a code point whose value is 0xF780 + byte − 0x80.
- return 0xF780 + bite - 0x80;
- };
- }
- // 15.5.2 x-user-defined encoder
- /**
- * @constructor
- * @implements {Encoder}
- * @param {{fatal: boolean}} options
- */
- function XUserDefinedEncoder(options) {
- var fatal = options.fatal;
- /**
- * @param {Stream} stream Input stream.
- * @param {number} code_point Next code point read from the stream.
- * @return {(number|!Array.<number>)} Byte(s) to emit.
- */
- this.handler = function(stream, code_point) {
- // 1.If code point is end-of-stream, return finished.
- if (code_point === end_of_stream)
- return finished;
- // 2. If code point is an ASCII code point, return a byte whose
- // value is code point.
- if (isASCIICodePoint(code_point))
- return code_point;
- // 3. If code point is in the range U+F780 to U+F7FF, inclusive,
- // return a byte whose value is code point − 0xF780 + 0x80.
- if (inRange(code_point, 0xF780, 0xF7FF))
- return code_point - 0xF780 + 0x80;
- // 4. Return error with code point.
- return encoderError(code_point);
- };
- }
- /** @param {{fatal: boolean}} options */
- encoders['x-user-defined'] = function(options) {
- return new XUserDefinedEncoder(options);
- };
- /** @param {{fatal: boolean}} options */
- decoders['x-user-defined'] = function(options) {
- return new XUserDefinedDecoder(options);
- };
- if (!global['TextEncoder'])
- global['TextEncoder'] = TextEncoder;
- if (!global['TextDecoder'])
- global['TextDecoder'] = TextDecoder;
- if (typeof module !== "undefined" && module.exports) {
- module.exports = {
- TextEncoder: global['TextEncoder'],
- TextDecoder: global['TextDecoder'],
- EncodingIndexes: global["encoding-indexes"]
- };
- }
- // For strict environments where `this` inside the global scope
- // is `undefined`, take a pure object instead
- }(this || {}));
|