/*
 * The Original Code is Mozilla Universal charset detector code.
 *
 * The Initial Developer of the Original Code is
 * Netscape Communications Corporation.
 * Portions created by the Initial Developer are Copyright (C) 2001
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *   António Afonso (antonio.afonso gmail.com) - port to JavaScript
 *   Mark Pilgrim - port to Python
 *   Shy Shalom - original C code
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301  USA
 */

var constants = require('./constants');
function CharSetProber() {
  this.reset = function () {
    this._mState = constants.detecting;
  };
  this.getCharsetName = function () {
    return null;
  };
  this.getSupportedCharsetNames = function () {
    throw new Error("Unimplemented method getSupportedCharsetNames()");
  };
  this.feed = function (aBuf) {};
  this.getState = function () {
    return this._mState;
  };
  this.getConfidence = function () {
    return 0.0;
  };
  this.filterHighBitOnly = function (aBuf) {
    aBuf = aBuf.replace(/[\x00-\x7F]+/g, " ");
    return aBuf;
  };
  this.filterWithoutEnglishLetters = function (aBuf) {
    aBuf = aBuf.replace(/[A-Za-z]+/g, " ");
    return aBuf;
  };

  // Returns a copy of aBuf that retains only the sequences of English
  // alphabet and high byte characters that are not between <> characters.
  // The exception are PHP tags which start with '<?' and end with '?>'.
  // This filter can be applied to all scripts which contain both English
  // characters and extended ASCII characters, but is currently only used by
  // Latin1Prober.
  this.removeXmlTags = function (aBuf) {
    var result = '';
    var inTag = false;
    var prev = 0;
    for (var curr = 0; curr < aBuf.length; curr++) {
      var c = aBuf[curr];
      if (c == '>' && aBuf[curr - 1] !== '?') {
        prev = curr + 1;
        inTag = false;
      } else if (c == '<' && aBuf[curr + 1] !== '?') {
        if (curr > prev && !inTag) {
          result = result + aBuf.substring(prev, curr) + ' ';
        }
        inTag = true;
      }
    }
    if (!inTag) {
      result = result + aBuf.substring(prev);
    }
    return result;
  };
}
module.exports = CharSetProber;