/**
 * rxn-parser - Parse the RXN format
 * @version v1.0.0
 * @link https://github.com/cheminfo/rxn-parser#readme
 * @license MIT
 */
(function (global, factory) {
    typeof exports === 'object' && typeof module !== 'undefined' ? module.exports = factory() :
    typeof define === 'function' && define.amd ? define(factory) :
    (global = typeof globalThis !== 'undefined' ? globalThis : global || self, global.RXNParser = factory());
})(this, (function () { 'use strict';

    /*
        https://tools.ietf.org/html/rfc3629

        UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4

        UTF8-1    = %x00-7F

        UTF8-2    = %xC2-DF UTF8-tail

        UTF8-3    = %xE0 %xA0-BF UTF8-tail
                    %xE1-EC 2( UTF8-tail )
                    %xED %x80-9F UTF8-tail
                    %xEE-EF 2( UTF8-tail )

        UTF8-4    = %xF0 %x90-BF 2( UTF8-tail )
                    %xF1-F3 3( UTF8-tail )
                    %xF4 %x80-8F 2( UTF8-tail )

        UTF8-tail = %x80-BF
    */
    /**
     * Check if a Node.js Buffer or Uint8Array is UTF-8.
     */
    function isUtf8(buf) {
      if (!buf) {
        return false;
      }
      var i = 0;
      var len = buf.length;
      while (i < len) {
        // UTF8-1 = %x00-7F
        if (buf[i] <= 0x7F) {
          i++;
          continue;
        }
        // UTF8-2 = %xC2-DF UTF8-tail
        if (buf[i] >= 0xC2 && buf[i] <= 0xDF) {
          // if(buf[i + 1] >= 0x80 && buf[i + 1] <= 0xBF) {
          if (buf[i + 1] >> 6 === 2) {
            i += 2;
            continue;
          } else {
            return false;
          }
        }
        // UTF8-3 = %xE0 %xA0-BF UTF8-tail
        // UTF8-3 = %xED %x80-9F UTF8-tail
        if ((buf[i] === 0xE0 && buf[i + 1] >= 0xA0 && buf[i + 1] <= 0xBF || buf[i] === 0xED && buf[i + 1] >= 0x80 && buf[i + 1] <= 0x9F) && buf[i + 2] >> 6 === 2) {
          i += 3;
          continue;
        }
        // UTF8-3 = %xE1-EC 2( UTF8-tail )
        // UTF8-3 = %xEE-EF 2( UTF8-tail )
        if ((buf[i] >= 0xE1 && buf[i] <= 0xEC || buf[i] >= 0xEE && buf[i] <= 0xEF) && buf[i + 1] >> 6 === 2 && buf[i + 2] >> 6 === 2) {
          i += 3;
          continue;
        }
        // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail )
        //          %xF1-F3 3( UTF8-tail )
        //          %xF4 %x80-8F 2( UTF8-tail )
        if ((buf[i] === 0xF0 && buf[i + 1] >= 0x90 && buf[i + 1] <= 0xBF || buf[i] >= 0xF1 && buf[i] <= 0xF3 && buf[i + 1] >> 6 === 2 || buf[i] === 0xF4 && buf[i + 1] >= 0x80 && buf[i + 1] <= 0x8F) && buf[i + 2] >> 6 === 2 && buf[i + 3] >> 6 === 2) {
          i += 4;
          continue;
        }
        return false;
      }
      return true;
    }

    /**
     * Ensure that the data is string. If it is an ArrayBuffer it will be converted to string using TextDecoder.
     * @param blob
     * @param options
     * @returns
     */
    function ensureString(blob, options = {}) {
      if (typeof blob === 'string') {
        return blob;
      }
      if (ArrayBuffer.isView(blob) || blob instanceof ArrayBuffer) {
        const {
          encoding = guessEncoding(blob)
        } = options;
        const decoder = new TextDecoder(encoding);
        return decoder.decode(blob);
      }
      throw new TypeError(`blob must be a string, ArrayBuffer or ArrayBufferView`);
    }
    function guessEncoding(blob) {
      const uint8 = ArrayBuffer.isView(blob) ? new Uint8Array(blob.buffer, blob.byteOffset, blob.byteLength) : new Uint8Array(blob);
      if (uint8.length >= 2) {
        if (uint8[0] === 0xfe && uint8[1] === 0xff) {
          return 'utf-16be';
        }
        if (uint8[0] === 0xff && uint8[1] === 0xfe) {
          return 'utf-16le';
        }
      }
      //@ts-expect-error an ArrayBuffer is also ok
      if (!isUtf8(blob)) return 'latin1';
      return 'utf-8';
    }

    /**
     * Parse a rxn file and return an object with reagents and products
     * @param {import('cheminfo-types').TextData} rxn
     * @returns
     */

    function parse(rxn) {
      rxn = ensureString(rxn);
      // we will find the delimiter in order to be much faster and not use regular expression
      let header = rxn.slice(0, 1000);
      let crlf = '\n';
      if (header.includes('\r\n')) {
        crlf = '\r\n';
      } else if (header.includes('\r')) {
        crlf = '\r';
      }
      let rxnParts = rxn.split(`${crlf}$MOL${crlf}`);
      let reagents = [];
      let products = [];
      let result = {};
      result.reagents = reagents;
      result.products = products;

      // the first part is expected to contain the number of reagents and products

      // First part should start with $RXN
      // and the fifth line should contain the number of reagents and products
      if (rxnParts.length === 0) throw new Error('file looks empty');
      header = rxnParts[0];
      if (header.indexOf('$RXN') !== 0) {
        throw new Error('file does not start with $RXN');
      }
      let lines = header.split(crlf);
      if (lines.length < 5) throw new Error('incorrect number of lines in header');
      let numberReagents = lines[4].slice(0, 3) >> 0;
      let numberProducts = lines[4].slice(3, 6) >> 0;

      // hack for JSME
      let thirdNumber = lines[4].slice(6, 9) >> 0; // for jsme

      if (thirdNumber && rxnParts[1]) {
        let lines = rxnParts[1].split(crlf);
        if (lines[0]) {
          numberReagents = lines[0].trim().replace(/>[^>]*$/, '').split(/[.>]/).length;
        }
      }
      if (numberReagents + numberProducts !== rxnParts.length - 1) {
        throw new Error('not the correct number of molecules');
      }
      for (let i = 1; i < rxnParts.length; i++) {
        if (i <= numberReagents) {
          reagents.push(rxnParts[i]);
        } else {
          products.push(rxnParts[i]);
        }
      }
      return result;
    }

    return parse;

}));
//# sourceMappingURL=rxn-parser.js.map
