/**
 * sdf-parser - SDF parser
 * @version v7.0.3
 * @link https://github.com/cheminfo/sdf-parser
 * @license MIT
 */
(function (global, factory) {
    typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) :
    typeof define === 'function' && define.amd ? define(['exports'], factory) :
    (global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global.SDFParser = {}));
})(this, (function (exports) { 'use strict';

    /*
        https://tools.ietf.org/html/rfc3629

        UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4

        UTF8-1    = %x00-7F

        UTF8-2    = %xC2-DF UTF8-tail

        UTF8-3    = %xE0 %xA0-BF UTF8-tail
                    %xE1-EC 2( UTF8-tail )
                    %xED %x80-9F UTF8-tail
                    %xEE-EF 2( UTF8-tail )

        UTF8-4    = %xF0 %x90-BF 2( UTF8-tail )
                    %xF1-F3 3( UTF8-tail )
                    %xF4 %x80-8F 2( UTF8-tail )

        UTF8-tail = %x80-BF
    */
    /**
     * Check if a Node.js Buffer or Uint8Array is UTF-8.
     */
    function isUtf8(buf) {
      if (!buf) {
        return false;
      }
      var i = 0;
      var len = buf.length;
      while (i < len) {
        // UTF8-1 = %x00-7F
        if (buf[i] <= 0x7F) {
          i++;
          continue;
        }
        // UTF8-2 = %xC2-DF UTF8-tail
        if (buf[i] >= 0xC2 && buf[i] <= 0xDF) {
          // if(buf[i + 1] >= 0x80 && buf[i + 1] <= 0xBF) {
          if (buf[i + 1] >> 6 === 2) {
            i += 2;
            continue;
          } else {
            return false;
          }
        }
        // UTF8-3 = %xE0 %xA0-BF UTF8-tail
        // UTF8-3 = %xED %x80-9F UTF8-tail
        if ((buf[i] === 0xE0 && buf[i + 1] >= 0xA0 && buf[i + 1] <= 0xBF || buf[i] === 0xED && buf[i + 1] >= 0x80 && buf[i + 1] <= 0x9F) && buf[i + 2] >> 6 === 2) {
          i += 3;
          continue;
        }
        // UTF8-3 = %xE1-EC 2( UTF8-tail )
        // UTF8-3 = %xEE-EF 2( UTF8-tail )
        if ((buf[i] >= 0xE1 && buf[i] <= 0xEC || buf[i] >= 0xEE && buf[i] <= 0xEF) && buf[i + 1] >> 6 === 2 && buf[i + 2] >> 6 === 2) {
          i += 3;
          continue;
        }
        // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail )
        //          %xF1-F3 3( UTF8-tail )
        //          %xF4 %x80-8F 2( UTF8-tail )
        if ((buf[i] === 0xF0 && buf[i + 1] >= 0x90 && buf[i + 1] <= 0xBF || buf[i] >= 0xF1 && buf[i] <= 0xF3 && buf[i + 1] >> 6 === 2 || buf[i] === 0xF4 && buf[i + 1] >= 0x80 && buf[i + 1] <= 0x8F) && buf[i + 2] >> 6 === 2 && buf[i + 3] >> 6 === 2) {
          i += 4;
          continue;
        }
        return false;
      }
      return true;
    }

    /**
     * Ensure that the data is string. If it is an ArrayBuffer it will be converted to string using TextDecoder.
     * @param blob
     * @param options
     * @returns
     */
    function ensureString(blob, options = {}) {
      if (typeof blob === 'string') {
        return blob;
      }
      if (ArrayBuffer.isView(blob) || blob instanceof ArrayBuffer) {
        const {
          encoding = guessEncoding(blob)
        } = options;
        const decoder = new TextDecoder(encoding);
        return decoder.decode(blob);
      }
      throw new TypeError(`blob must be a string, ArrayBuffer or ArrayBufferView`);
    }
    function guessEncoding(blob) {
      const uint8 = ArrayBuffer.isView(blob) ? new Uint8Array(blob.buffer, blob.byteOffset, blob.byteLength) : new Uint8Array(blob);
      if (uint8.length >= 2) {
        if (uint8[0] === 0xfe && uint8[1] === 0xff) {
          return 'utf-16be';
        }
        if (uint8[0] === 0xff && uint8[1] === 0xfe) {
          return 'utf-16le';
        }
      }
      //@ts-expect-error an ArrayBuffer is also ok
      if (!isUtf8(blob)) return 'latin1';
      return 'utf-8';
    }

    /**
     *
     * @param {*} string
     * @param {*} substring
     * @param {*} eol
     * @returns
     */
    function getEntriesBoundaries(string, substring, eol) {
      const res = [];
      let previous = 0;
      let next = 0;
      while (next !== -1) {
        next = string.indexOf(substring, previous);
        if (next !== -1) {
          res.push([previous, next]);
          const nextMatch = string.indexOf(eol, next + substring.length);
          if (nextMatch === -1) {
            next = -1;
          } else {
            previous = nextMatch + eol.length;
            next = previous;
          }
        } else {
          res.push([previous, string.length]);
        }
      }
      return res;
    }

    /**
     * Parse the molfile and the properties with > < labels >
     * @param {string} sdfPart
     * @param {*} labels
     * @param {*} currentLabels
     * @param {object} options
     * @returns
     */
    function getMolecule$1(sdfPart, labels, currentLabels, options) {
      let parts = sdfPart.split(`${options.eol}>`);
      if (parts.length === 0 || parts[0].length <= 5) return;
      let molecule = {};
      molecule.molfile = parts[0] + options.eol;
      for (let j = 1; j < parts.length; j++) {
        let lines = parts[j].split(options.eol);
        let from = lines[0].indexOf('<');
        let to = lines[0].indexOf('>');
        let label = lines[0].slice(from + 1, to);
        currentLabels.push(label);
        if (!labels[label]) {
          labels[label] = {
            counter: 0,
            isNumeric: options.dynamicTyping,
            keep: false
          };
          if ((!options.exclude || !options.exclude.includes(label)) && (!options.include || options.include.includes(label))) {
            labels[label].keep = true;
            if (options.modifiers[label]) {
              labels[label].modifier = options.modifiers[label];
            }
            if (options.forEach[label]) {
              labels[label].forEach = options.forEach[label];
            }
          }
        }
        if (labels[label].keep) {
          for (let k = 1; k < lines.length - 1; k++) {
            if (molecule[label]) {
              molecule[label] += options.eol + lines[k];
            } else {
              molecule[label] = lines[k];
            }
          }
          if (labels[label].modifier) {
            let modifiedValue = labels[label].modifier(molecule[label]);
            if (modifiedValue === undefined || modifiedValue === null) {
              delete molecule[label];
            } else {
              molecule[label] = modifiedValue;
            }
          }
          if (labels[label].isNumeric && (!Number.isFinite(+molecule[label]) || molecule[label].match(/^0[0-9]/))) {
            labels[label].isNumeric = false;
          }
        }
      }
      return molecule;
    }

    /**
     *  Parse a SDF file
     * @param {string|ArrayBuffer|Uint8Array} sdf - SDF file to parse
     * @param {object} [options={}]
     * @param {string[]} [options.include] - List of fields to include
     * @param {string[]} [options.exclude] - List of fields to exclude
     * @param {Function} [options.filter] - Callback allowing to filter the molecules
     * @param {boolean} [options.dynamicTyping] - Dynamically type the data
     * @param {object} [options.modifiers] - Object containing callbacks to apply on some specific fields
     * @param {boolean} [options.mixedEOL=false] - Set to true if you know there is a mixture between \r\n and \n
     * @param {string} [options.eol] - Specify the end of line character. Default will be the one found in the file
     */
    function parse(sdf, options = {}) {
      options = {
        ...options
      };
      if (options.modifiers === undefined) options.modifiers = {};
      if (options.forEach === undefined) options.forEach = {};
      if (options.dynamicTyping === undefined) options.dynamicTyping = true;
      sdf = ensureString(sdf);
      if (typeof sdf !== 'string') {
        throw new TypeError('Parameter "sdf" must be a string');
      }
      if (options.eol === undefined) {
        options.eol = '\n';
        if (options.mixedEOL) {
          sdf = sdf.replaceAll('\r\n', '\n');
          sdf = sdf.replaceAll('\r', '\n');
        } else {
          // we will find the delimiter in order to be much faster and not use regular expression
          let header = new Set(sdf.slice(0, 1000));
          if (header.has('\r\n')) {
            options.eol = '\r\n';
          } else if (header.has('\r')) {
            options.eol = '\r';
          }
        }
      }
      let entriesBoundaries = getEntriesBoundaries(sdf, `${options.eol}$$$$`, options.eol);
      let molecules = [];
      let labels = {};
      let start = Date.now();
      for (let i = 0; i < entriesBoundaries.length; i++) {
        let sdfPart = sdf.slice(...entriesBoundaries[i]);
        let currentLabels = [];
        const molecule = getMolecule$1(sdfPart, labels, currentLabels, options);
        if (!molecule) continue;
        if (!options.filter || options.filter(molecule)) {
          molecules.push(molecule);
          // only now we can increase the counter
          for (let j = 0; j < currentLabels.length; j++) {
            labels[currentLabels[j]].counter++;
          }
        }
      }
      // all numeric fields should be converted to numbers
      for (let label in labels) {
        let currentLabel = labels[label];
        if (currentLabel.isNumeric) {
          currentLabel.minValue = Infinity;
          currentLabel.maxValue = -Infinity;
          for (let j = 0; j < molecules.length; j++) {
            if (molecules[j][label]) {
              let value = Number.parseFloat(molecules[j][label]);
              molecules[j][label] = value;
              if (value > currentLabel.maxValue) {
                currentLabel.maxValue = value;
              }
              if (value < currentLabel.minValue) {
                currentLabel.minValue = value;
              }
            }
          }
        }
      }

      // we check that a label is in all the records
      for (let key in labels) {
        if (labels[key].counter === molecules.length) {
          labels[key].always = true;
        } else {
          labels[key].always = false;
        }
      }
      let statistics = [];
      for (let key in labels) {
        let statistic = labels[key];
        statistic.label = key;
        statistics.push(statistic);
      }
      return {
        time: Date.now() - start,
        molecules,
        labels: Object.keys(labels),
        statistics
      };
    }

    function parseString(value) {
      if (value.length === 4 || value.length === 5) {
        const lowercase = value.toLowerCase();
        if (lowercase === 'true') return true;
        if (lowercase === 'false') return false;
      }
      const number = Number(value);
      if (number === 0 && !value.includes('0')) {
        return value;
      }
      if (!Number.isNaN(number)) return number;
      return value;
    }

    class MolfileStream extends TransformStream {
      #buffer = '';
      constructor() {
        super({
          transform: (chunk, controller) => {
            this.#buffer += chunk;
            let begin = 0;
            let index = 0;
            while ((index = this.#buffer.indexOf('$$$$', index)) !== -1) {
              // we need to check if the delimiter '\n' is in the current buffer
              // if it is not we need to wait for the next chunk
              const endOfDelimiter = this.#buffer.indexOf('\n', index);
              if (endOfDelimiter === -1) {
                index = begin;
                break;
              }
              const eolLength = this.#buffer[endOfDelimiter - 1] === '\r' ? 2 : 1;
              // need to remove the last eol because we will split on eol+'>' in getMolecule
              controller.enqueue(this.#buffer.slice(begin, index - eolLength));
              index = endOfDelimiter + eolLength;
              begin = index;
            }
            this.#buffer = this.#buffer.slice(begin);
          },
          flush: controller => {
            if (this.#buffer) {
              controller.enqueue(this.#buffer);
            }
          }
        });
      }
    }

    /**
     *  Parse a SDF file as an iterator
     * @param {ReadableStream} readStream - SDF file to parse
     * @param {object} [options={}] - iterator options
     * @param {Function} [options.filter] - Callback allowing to filter the molecules
     * @param {string} [options.eol='\n'] - End of line character
     * @param {boolean} [options.dynamicTyping] - Dynamically type the data
     * @yields {object} - Molecule object
     */
    async function* iterator(readStream, options = {}) {
      const {
        eol = '\n',
        dynamicTyping = true
      } = options;
      const moleculeStream = readStream.pipeThrough(new MolfileStream({
        eol
      }));
      for await (const entry of moleculeStream) {
        if (entry.length < 20) continue;
        const molecule = getMolecule(entry, {
          eol,
          dynamicTyping
        });
        if (!options.filter || options.filter(molecule)) {
          yield molecule;
        }
      }
    }

    /**
     * Convert a SDF part to an object
     * @param {string} sdfPart - text containing the molfile
     * @param {object} options - options
     * @param {string} options.eol - end of line character
     * @param {boolean} options.dynamicTyping - Dynamically type the data (create numbers and booleans)
     * @returns
     */
    function getMolecule(sdfPart, options) {
      const {
        eol,
        dynamicTyping
      } = options;
      let parts = sdfPart.split(`${eol}>`);
      if (parts.length === 0 || parts[0].length <= 5) return;
      let molecule = {};
      molecule.molfile = parts[0] + eol;
      for (let j = 1; j < parts.length; j++) {
        let lines = parts[j].split(eol);
        let from = lines[0].indexOf('<');
        let to = lines[0].indexOf('>');
        let label = lines[0].slice(from + 1, to);
        for (let k = 1; k < lines.length - 1; k++) {
          if (molecule[label]) {
            molecule[label] += eol + lines[k];
          } else {
            molecule[label] = lines[k];
          }
        }
        if (dynamicTyping) {
          molecule[label] = parseString(molecule[label]);
        }
      }
      return molecule;
    }

    exports.MolfileStream = MolfileStream;
    exports.iterator = iterator;
    exports.parse = parse;

}));
//# sourceMappingURL=sdf-parser.js.map
