package ilc
package examples
package bench

import scala.util.Random
import org.scalameter.api.Gen
import ilc.feature.abelianMaps.Library._
import ilc.feature.abelianGroups.Library._
import ilc.feature.bags.Library._

class HistogramBenchmark extends OnlyDerivativeBenchmark(
  new WordCountBenchData(HistogramGenerated) {
    override def base = 1 //Should be around the break-even point.
    override def last = FastBenchmarksFlag.choose(512, 4096)
    override def step = 2
    override def sizes: Gen[Int] = Gen.exponential("n")(base, last, step)
  }) {
  override def iters: Int = 10
}

class HistogramRecomputeBenchmark extends OnlyRecomputationBenchmark(
  new WordCountBenchData(HistogramGenerated) {
    override def base = 1
    override def last = FastBenchmarksFlag.choose(512, 4096)
    override def step = 2
    override def sizes: Gen[Int] =
      Gen.exponential("n")(base, last, step)
      // exponential doesn't support base = last (it produces n = 0 during
      // warmup). Great! Use single instead:
      //Gen.single("n")(last)
  })

object HistogramVerification extends BenchmarkVerification(
  new WordCountBenchData(HistogramGenerated) {
    override def base = 2
    override def last = 2
    override def step = 10
  })


class WordCountBenchData(val example: ExampleGenerated {
  type InputType = AbelianMap[Int, Bag[Int]]
  type DeltaInputType =
    Either[(AbelianGroup[InputType], InputType), InputType]

  type OutputType = AbelianMap[Int, Int]
  type DeltaOutputType =
    Either[(AbelianGroup[OutputType], OutputType), OutputType]
})
extends BenchData
{
  import example._

  val vocabularySize = 1000

  /** word limit in UCLA personal statement for undergrad admission */
  val expectedDocumentSize = 1000

  /**
    * Returns a pseudorandom, uniformly distributed int value between from
    * (inclusive) and to (inclusive).
    */
  def rand(from: Int, to: Int) = from + Random.nextInt(to - from + 1)

  def rand1(n: Int): Int = rand(1, n)

  def randomWord: Int = rand1(vocabularySize)

  def randomWords(wordCount: Int): Seq[Int] =
    Seq.fill(wordCount)(randomWord)

  /** Put `elements` randomly in a `numberOfGroups`. */
  def randomGroups(elements: Seq[Int], numberOfGroups: Int):
      Seq[List[Int]] =
    {
      val groups: Array[List[Int]] = Array.fill(numberOfGroups)(Nil)
      val maxGroupIndex = numberOfGroups - 1
      elements foreach { element =>
        val i = rand(0, maxGroupIndex)
        groups(i) = element :: groups(i)
      }
      groups
    }

  def changeDescriptions: Gen[String] =
    Gen.enumeration("change")("random changes")

  /** Given `n`, create a hash trie of `n` documents with
    *
    *   n * expectedDocumentSize
    *
    * words in total. Words are put into documents at random
    * in the manner of the balls-and-bins situation.
    */
  def inputOfSize(n: Int): InputType = {
    val numberOfDocuments = n
    val words = randomWords(getTotalWords(n))
    val documents = randomGroups(words, numberOfDocuments) map { group =>
      Bag(group: _*)
    }
    AbelianMap((0 until numberOfDocuments) map {i => (i, documents(i))}: _*)
  }

  def getTotalWords(n: Int): Int = n * expectedDocumentSize

  def halfChance: Boolean = if (rand(0, 1) == 0) true else false

  /** Generate a random insertion or a random deletion. */
  def lookupChange(desc: String,
                   n: Int,
                   input: InputType,
                   output: OutputType):
      DeltaInputType =
  {
    val totalWords = getTotalWords(n)
    val changeGroupElement: InputType =
      if (halfChance)
        generateInsertion(input, totalWords)
      else
        generateDeletion(input, totalWords)
    Left((
      LiftedMapGroup[Int, Bag[Int]](FreeAbelianGroup()),
      changeGroupElement))
  }

  sealed trait CountingWords
  case class FoundTheWord(word: Int) extends CountingWords
  case class CountedWholeDocument(wordCount: Int) extends CountingWords

  def ithWord(document: Bag[Int], i: Int): CountingWords = {
    var wordCount = 0
    val iterator = document.iterator
    while (iterator.hasNext) {
      val (word, multiplicity) = iterator.next
      wordCount += multiplicity
      if (wordCount > i)
        return FoundTheWord(word)
    }
    CountedWholeDocument(wordCount)
  }

  def ithWordInDoc(input: InputType, i: Int): (Int, Int) = {
    def loop(i: Int, iterator: Iterator[(Int, Bag[Int])]): (Int, Int) = {
      val (docID, document) = iterator.next
      ithWord(document, i) match {
        case FoundTheWord(word) =>
          (word, docID)
        case CountedWholeDocument(wordCount) =>
          loop(i - wordCount, iterator)
      }
    }
    loop(i, input.iterator)
  }

  def generateDeletion(input: InputType, totalWords: Int): InputType = {
    val (word, docID) = ithWordInDoc(input, rand(0, totalWords - 1))
    AbelianMap(docID -> bagNegate(Bag(word)))
  }

  def generateInsertion(input: InputType, totalWords: Int): InputType = {
    val numberOfDocuments = input.size
    val insertedWord = randomWord
    val locationOfInsertion = rand(- numberOfDocuments, totalWords - 1)
    // negative locations represent space between documents.
    // if a word is inserted there, then a new document is created.
    if (locationOfInsertion < 0) {
      val newDocID = numberOfDocuments
      assert(! (input contains newDocID))
      AbelianMap(newDocID -> Bag(insertedWord))
    }
    else {
      val (word, docID) = ithWordInDoc(input, locationOfInsertion)
      // triggers scala 2.10.2 bug in merging HashMaps (Bags):
      // if multiplicity is 1, sometimes the collision handler
      // is invoked on null.
      AbelianMap(docID -> Bag(insertedWord))
    }
  }
}