/**
 * sdf-parser - SDF parser
 * @version v5.0.2
 * @link https://github.com/cheminfo/sdf-parser
 * @license MIT
 */
(function (global, factory) {
    typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) :
    typeof define === 'function' && define.amd ? define(['exports'], factory) :
    (global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global.SDFParser = {}));
})(this, (function (exports) { 'use strict';

    /*
        https://tools.ietf.org/html/rfc3629

        UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4

        UTF8-1    = %x00-7F

        UTF8-2    = %xC2-DF UTF8-tail

        UTF8-3    = %xE0 %xA0-BF UTF8-tail
                    %xE1-EC 2( UTF8-tail )
                    %xED %x80-9F UTF8-tail
                    %xEE-EF 2( UTF8-tail )

        UTF8-4    = %xF0 %x90-BF 2( UTF8-tail )
                    %xF1-F3 3( UTF8-tail )
                    %xF4 %x80-8F 2( UTF8-tail )

        UTF8-tail = %x80-BF
    */

    /**
     * Check if a Node.js Buffer or Uint8Array is UTF-8.
     */
    function isUtf8(buf) {
      if (!buf) {
        return false;
      }

      var i = 0;
      var len = buf.length;

      while (i < len) {
        // UTF8-1 = %x00-7F
        if (buf[i] <= 0x7F) {
          i++;
          continue;
        } // UTF8-2 = %xC2-DF UTF8-tail


        if (buf[i] >= 0xC2 && buf[i] <= 0xDF) {
          // if(buf[i + 1] >= 0x80 && buf[i + 1] <= 0xBF) {
          if (buf[i + 1] >> 6 === 2) {
            i += 2;
            continue;
          } else {
            return false;
          }
        } // UTF8-3 = %xE0 %xA0-BF UTF8-tail
        // UTF8-3 = %xED %x80-9F UTF8-tail


        if ((buf[i] === 0xE0 && buf[i + 1] >= 0xA0 && buf[i + 1] <= 0xBF || buf[i] === 0xED && buf[i + 1] >= 0x80 && buf[i + 1] <= 0x9F) && buf[i + 2] >> 6 === 2) {
          i += 3;
          continue;
        } // UTF8-3 = %xE1-EC 2( UTF8-tail )
        // UTF8-3 = %xEE-EF 2( UTF8-tail )


        if ((buf[i] >= 0xE1 && buf[i] <= 0xEC || buf[i] >= 0xEE && buf[i] <= 0xEF) && buf[i + 1] >> 6 === 2 && buf[i + 2] >> 6 === 2) {
          i += 3;
          continue;
        } // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail )
        //          %xF1-F3 3( UTF8-tail )
        //          %xF4 %x80-8F 2( UTF8-tail )


        if ((buf[i] === 0xF0 && buf[i + 1] >= 0x90 && buf[i + 1] <= 0xBF || buf[i] >= 0xF1 && buf[i] <= 0xF3 && buf[i + 1] >> 6 === 2 || buf[i] === 0xF4 && buf[i + 1] >= 0x80 && buf[i + 1] <= 0x8F) && buf[i + 2] >> 6 === 2 && buf[i + 3] >> 6 === 2) {
          i += 4;
          continue;
        }

        return false;
      }

      return true;
    }

    /**
     * Ensure that the data is string. If it is an ArrayBuffer it will be converted to string using TextDecoder.
     * @param blob
     * @param options
     * @returns
     */

    function ensureString(blob) {
      let options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};

      if (typeof blob === 'string') {
        return blob;
      }

      if (ArrayBuffer.isView(blob) || blob instanceof ArrayBuffer) {
        const {
          encoding = guessEncoding(blob)
        } = options;
        const decoder = new TextDecoder(encoding);
        return decoder.decode(blob);
      }

      throw new TypeError(`blob must be a string, ArrayBuffer or ArrayBufferView`);
    }

    function guessEncoding(blob) {
      const uint8 = ArrayBuffer.isView(blob) ? new Uint8Array(blob.buffer, blob.byteOffset, blob.byteLength) : new Uint8Array(blob);

      if (uint8.length >= 2) {
        if (uint8[0] === 0xfe && uint8[1] === 0xff) {
          return 'utf-16be';
        }

        if (uint8[0] === 0xff && uint8[1] === 0xfe) {
          return 'utf-16le';
        }
      } //@ts-expect-error an ArrayBuffer is also ok


      if (!isUtf8(blob)) return 'latin1';
      return 'utf-8';
    }

    function getEntriesBoundaries(string, substring, eol) {
      const res = [];
      let previous = 0;
      let next = 0;

      while (next !== -1) {
        next = string.indexOf(substring, previous);

        if (next !== -1) {
          res.push([previous, next]);
          const nextMatch = string.indexOf(eol, next + substring.length);

          if (nextMatch === -1) {
            next = -1;
          } else {
            previous = nextMatch + eol.length;
            next = previous;
          }
        } else {
          res.push([previous, string.length]);
        }
      }

      return res;
    }

    /**
     *  Parse a SDF file
     * @param {string|ArrayBuffer|Uint8Array} sdf SDF file to parse
     * @param {any} [options={}]
     * @param {array<string>} [options.include] List of fields to include
     * @param {array<string>} [options.exclude] List of fields to exclude
     * @param {boolean} [options.dynamicTyping] Dynamically type the data
     * @param {object} [options.modifiers] Object containing callbacks to apply on some specific fields
     * @param {boolean} [options.mixedEOL=false] Set to true if you know there is a mixture between \r\n and \n
     */

    function parse(sdf) {
      let options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
      const {
        include,
        exclude,
        filter,
        modifiers = {},
        forEach = {},
        dynamicTyping = true
      } = options;
      sdf = ensureString(sdf);

      if (typeof sdf !== 'string') {
        throw new TypeError('Parameter "sdf" must be a string');
      }

      let eol = '\n';

      if (options.mixedEOL) {
        sdf = sdf.replace(/\r\n/g, '\n');
        sdf = sdf.replace(/\r/g, '\n');
      } else {
        // we will find the delimiter in order to be much faster and not use regular expression
        let header = sdf.substr(0, 1000);

        if (header.indexOf('\r\n') > -1) {
          eol = '\r\n';
        } else if (header.indexOf('\r') > -1) {
          eol = '\r';
        }
      }

      let entriesBoundaries = getEntriesBoundaries(sdf, `${eol}$$$$`, eol);
      let molecules = [];
      let labels = {};
      let start = Date.now();

      for (let i = 0; i < entriesBoundaries.length; i++) {
        let sdfPart = sdf.substring(...entriesBoundaries[i]);
        let parts = sdfPart.split(`${eol}>`);

        if (parts.length > 0 && parts[0].length > 5) {
          let molecule = {};
          let currentLabels = [];
          molecule.molfile = parts[0] + eol;

          for (let j = 1; j < parts.length; j++) {
            let lines = parts[j].split(eol);
            let from = lines[0].indexOf('<');
            let to = lines[0].indexOf('>');
            let label = lines[0].substring(from + 1, to);
            currentLabels.push(label);

            if (!labels[label]) {
              labels[label] = {
                counter: 0,
                isNumeric: dynamicTyping,
                keep: false
              };

              if ((!exclude || exclude.indexOf(label) === -1) && (!include || include.indexOf(label) > -1)) {
                labels[label].keep = true;

                if (modifiers[label]) {
                  labels[label].modifier = modifiers[label];
                }

                if (forEach[label]) {
                  labels[label].forEach = forEach[label];
                }
              }
            }

            if (labels[label].keep) {
              for (let k = 1; k < lines.length - 1; k++) {
                if (molecule[label]) {
                  molecule[label] += eol + lines[k];
                } else {
                  molecule[label] = lines[k];
                }
              }

              if (labels[label].modifier) {
                let modifiedValue = labels[label].modifier(molecule[label]);

                if (modifiedValue === undefined || modifiedValue === null) {
                  delete molecule[label];
                } else {
                  molecule[label] = modifiedValue;
                }
              }

              if (labels[label].isNumeric) {
                if (!isFinite(molecule[label]) || molecule[label].match(/^0[0-9]/)) {
                  labels[label].isNumeric = false;
                }
              }
            }
          }

          if (!filter || filter(molecule)) {
            molecules.push(molecule); // only now we can increase the counter

            for (let j = 0; j < currentLabels.length; j++) {
              labels[currentLabels[j]].counter++;
            }
          }
        }
      } // all numeric fields should be converted to numbers


      for (let label in labels) {
        let currentLabel = labels[label];

        if (currentLabel.isNumeric) {
          currentLabel.minValue = Infinity;
          currentLabel.maxValue = -Infinity;

          for (let j = 0; j < molecules.length; j++) {
            if (molecules[j][label]) {
              let value = parseFloat(molecules[j][label]);
              molecules[j][label] = value;

              if (value > currentLabel.maxValue) {
                currentLabel.maxValue = value;
              }

              if (value < currentLabel.minValue) {
                currentLabel.minValue = value;
              }
            }
          }
        }
      } // we check that a label is in all the records


      for (let key in labels) {
        if (labels[key].counter === molecules.length) {
          labels[key].always = true;
        } else {
          labels[key].always = false;
        }
      }

      let statistics = [];

      for (let key in labels) {
        let statistic = labels[key];
        statistic.label = key;
        statistics.push(statistic);
      }

      return {
        time: Date.now() - start,
        molecules,
        labels: Object.keys(labels),
        statistics
      };
    }

    exports.parse = parse;

    Object.defineProperty(exports, '__esModule', { value: true });

}));
//# sourceMappingURL=sdf-parser.js.map
