import ptJson from "./pt-term-freq-idf.json";

const freqTerms = {
  pt: ptJson,
};

class Summarizer {
  constructor(lang, text, number_sentences) {
    // Load wikipedia idf
    var actual_JSON;
    let json_path = freqTerms[lang];
    this.term_idf = json_path;
    /* Node code:
        var fs = require('fs');
        const { nextLine } = require('line-reader');
        this.term_idf = JSON.parse(fs.readFileSync(__dirname + '/generate_idf/summarizer-idf/' + lang + '-term-freq-idf.json', 'utf8'));
        */
    // Save parameters
    this.lang = lang;
    this.text = text;
    this.number_sentences = Math.round(number_sentences);
    // Sentokens is an array of sentences, each sentence is an array of tokens
    this.sentokens = Summarizer.sentenizer_tokenizer(text);
    // tfidf_sentokens have the same structure as sentokens but it stores the tfidf value of each token
    this.tfidf_sentokens = [];
    this.calc_tfidf_sentokens();
    // tfidf_sentence is an aggregation of tfidf values of the sentence, is an array of floats. The aggregation function can be sum or avg. In the future this aggregation can be done better.
    this.tfidf_sentence = [];
    this.sum_tfidf();
    // Summary sentences store the sentences reduced in sumarize method using the tfidf values
    this.summarize();
  }

  change_number_sentences(n) {
    this.number_sentences = n;
    this.summarize();
  }

  // Splitter static methods

  static sentenizer(text) {
    if (text == "" || text.length < 3) return 0;
    text = text.replace(/([^!?])\n/g, "$1. ");
    return text
      .match(/([^.?!]+)[.!?]+['"`’”]*|.+/g)
      .map(Function.prototype.call, String.prototype.trim)
      .filter(Boolean);
  }

  static tokenizer(text) {
    return text
      .match(/[^\s]+/g)
      .map(Function.prototype.call, String.prototype.trim);
  }

  static sentenizer_tokenizer(text) {
    var result = [];
    for (var sentence of Summarizer.sentenizer(text)) {
      result.push(Summarizer.tokenizer(sentence));
    }
    return result;
  }

  // tfidf methods

  tfidf(token, freq, number_tokens) {
    if (token.length <= 2) return 0;
    if (this.term_idf[token] == undefined) return 1;
    return (freq / number_tokens) * this.term_idf[token];
  }

  calc_tfidf_sentokens() {
    // Canonize the tokens
    var sentokens_clean = this.sentokens.map((sentence) =>
      sentence.map((token) =>
        token.toLowerCase().replace(/[.,\/#!\?$%\^&\*;:{}=\-_`~()]/g, "")
      )
    );

    // Calculate token frequency in sentokens
    var tokens = {};
    for (var i = 0; i < sentokens_clean.length; i++)
      for (var j = 0; j < sentokens_clean[i].length; j++)
        if (tokens[sentokens_clean[i][j]] == undefined)
          tokens[sentokens_clean[i][j]] = 1;
        else tokens[sentokens_clean[i][j]]++;

    this.tfidf_sentokens = [];
    // Calculate tfidf a save in result
    for (var i = 0; i < sentokens_clean.length; i++) {
      this.tfidf_sentokens[i] = [];
      for (var j = 0; j < sentokens_clean[i].length; j++) {
        this.tfidf_sentokens[i].push(
          this.tfidf(
            sentokens_clean[i][j],
            tokens[sentokens_clean[i][j]],
            Object.keys(tokens).length
          )
        );
      }
    }
  }

  // Unused but and alternative to sum
  average_tfidf() {
    var result = [];
    for (var i = 0; i < this.tfidf_sentokens.length; i++) {
      var sum = 0;
      var count = 0;
      for (var j = 0; j < this.tfidf_sentokens[i].length; j++) {
        sum += this.tfidf_sentokens[i][j];
        count++;
      }
      if (count == 0) continue;
      this.tfidf_sentence.push(sum / count);
    }
  }

  sum_tfidf() {
    this.tfidf_sentence = [];
    for (var i = 0; i < this.tfidf_sentokens.length; i++) {
      var sum = 0;
      for (var j = 0; j < this.tfidf_sentokens[i].length; j++) {
        sum += this.tfidf_sentokens[i][j];
      }
      this.tfidf_sentence.push(sum);
    }
  }

  minpos_array(a) {
    var min = 99999;
    var min_pos = 0;
    for (var i = 0; i < a.length; i++) {
      if (a[i] < min) {
        min = a[i];
        min_pos = i;
      }
    }
    return min_pos;
  }

  summarize() {
    this.summary_sentences = [...this.sentokens];
    this.tfidf_sentence_copy = [...this.tfidf_sentence];
    var actual_sentences = this.summary_sentences.length;
    var min_pos;
    while (actual_sentences > this.number_sentences) {
      min_pos = this.minpos_array(this.tfidf_sentence_copy);
      this.summary_sentences.splice(min_pos, 1);
      this.tfidf_sentence_copy.splice(min_pos, 1);
      actual_sentences--;
    }
    this.summary = this.summary_sentences.map((a) => a.join(" ")).join(" ");
  }
}

function summarize(text, perSumm, language) {
  let resumen_obj = null;
  let percent = perSumm || 20;
  let lang = language || "pt";
  let resumen_sentences = (percent * Summarizer.sentenizer(text).length) / 100;
  if (resumen_sentences < 1) return;
  let resumen = "";
  if (resumen_obj == null) {
    resumen_obj = new Summarizer(lang, text, resumen_sentences);
    resumen = resumen_obj.summary;
  } else {
    resumen_obj.change_number_sentences(resumen_sentences);
    resumen = resumen_obj.summary;
  }
  return resumen;
}

export default summarize;
