lucene/gradle/generation/snowball.gradle

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import org.apache.tools.ant.taskdefs.condition.Os

def resources = scriptResources(buildscript)

apply plugin: "de.undercouch.download"

configure(project(":lucene:analysis:common")) {
  ext {
    // git commit hash of source code https://github.com/snowballstem/snowball/
    snowballStemmerCommit = "d8cf01ddf37a9c74a78ada44531c08f7952f2a39"
    // git commit hash of stopwords https://github.com/snowballstem/snowball-website
    snowballWebsiteCommit = "ee7cee9bc52f22802f21e94f42d887b0dfa7d2a8"
    // git commit hash of test data https://github.com/snowballstem/snowball-data
    snowballDataCommit    = "35461050d8f81e8aeac26e38f8a8dbf1afb82721"

    snowballWorkDir    = file("${buildDir}/snowball")

    snowballStemmerDir = file("${snowballWorkDir}/stemmers-${snowballStemmerCommit}")
    snowballWebsiteDir = file("${snowballWorkDir}/website-${snowballWebsiteCommit}")
    snowballDataDir    = file("${snowballWorkDir}/data-${snowballDataCommit}")

    snowballPatchFile  = file("${resources}/snowball.patch")
    snowballScript     = file("${resources}/snowball.sh")
  }

  def unpackFromZip = { zipFile, targetDir ->
    project.sync {
      from(zipTree(zipFile), {
        eachFile { fcd ->
          fcd.relativePath = new RelativePath(true, fcd.relativePath.segments.drop(1))
        }
      })
      into targetDir
    }
  }

  // downloads snowball stemmers (or use cached copy)
  task downloadSnowballStemmers(type: Download) {
    inputs.file(snowballPatchFile)
    src "https://github.com/snowballstem/snowball/archive/${snowballStemmerCommit}.zip"
    dest file("${snowballStemmerDir}.zip")
    overwrite false
    tempAndMove true

    doLast {
      unpackFromZip(dest, snowballStemmerDir)
      ant.patch(patchfile: snowballPatchFile, dir: snowballStemmerDir, strip: "1", failonerror: true)
    }
  }

  // downloads snowball website (or use cached copy)
  task downloadSnowballWebsite(type: Download) {
    src "https://github.com/snowballstem/snowball-website/archive/${snowballWebsiteCommit}.zip"
    def snowballWebsiteZip = file("${snowballWebsiteDir}.zip")
    dest snowballWebsiteZip
    overwrite false
    tempAndMove true

    doLast {
      unpackFromZip(snowballWebsiteZip, snowballWebsiteDir)
    }
  }

  // downloads snowball test data (or use cached copy)
  task downloadSnowballData(type: Download) {
    src "https://github.com/snowballstem/snowball-data/archive/${snowballDataCommit}.zip"
    def snowballDataZip = file("${snowballDataDir}.zip")
    dest snowballDataZip
    overwrite false
    tempAndMove true

    doLast {
      unpackFromZip(snowballDataZip, snowballDataDir)
    }
  }

  // runs shell script to regenerate stemmers, base stemming subclasses, test data, and stopwords.
  task snowballInternal() {
    description "Regenerate snowball stemmers."
    group "generation"

    inputs.files fileTree(
        dir: "src/java/org/tartarus/snowball",
        include: [
            "Among.java",
            "SnowballStemmer.java",
            "SnowballProgram.java",
            "ext/*Stemmer.java"
        ])

    inputs.files fileTree(
        dir: "src/resources/org/apache/lucene/analysis/snowball",
        include: "*_stop.txt")

    inputs.files fileTree(
        dir: "src/test/org/apache/lucene/analysis/snowball",
        include: "*.zip")

    // Don't even bother adding dependencies on Windows.
    if (Os.isFamily(Os.FAMILY_WINDOWS)) {
      doFirst {
        // Just emit a big fat error message. Fail the build so that checksums are not regenerated.
        throw new GradleException("Snowball generation does not work on Windows (patch and bash must be available).")
      }
    } else {
      dependsOn downloadSnowballStemmers
      dependsOn downloadSnowballWebsite
      dependsOn downloadSnowballData

      doFirst {
        project.quietExec {
          executable "bash"
          args = [snowballScript, snowballStemmerDir, snowballWebsiteDir, snowballDataDir, projectDir]
        }
      }
    }
  }

  regenerate.dependsOn wrapWithPersistentChecksums(snowballInternal, [ andThenTasks: ["spotlessJava", "spotlessJavaApply"], ignoreWithSource: [downloadSnowballStemmers, downloadSnowballWebsite, downloadSnowballData] ])
}