SOLR-1499: Added SolrEntityProcessor that imports data from another Solr core or instance based on a specified query.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1212394 13f79535-47bb-0310-9956-ffa450edef68
2011-12-09 13:17:12 +00:00 · 2011-12-09 13:17:12 +00:00 · 5e8c223baa
parent c59a66679a
commit 5e8c223baa
16 changed files with 1964 additions and 0 deletions
--- a/solr/contrib/dataimporthandler/CHANGES.txt
+++ b/solr/contrib/dataimporthandler/CHANGES.txt
@ -12,6 +12,13 @@ $Id$
 (No Changes)
 ==================  3.6.0 ==================
 New Features
 ----------------------
 * SOLR-1499: Added SolrEntityProcessor that imports data from another Solr core or instance based on a specified query.
             (Lance Norskog, Erik Hatcher, Pulkit Singhal, Ahmet Arslan, Luca Cavanna, Martijn van Groningen)
 ==================  3.5.0 ==================
 Bug Fixes
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SolrEntityProcessor.java
+++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SolrEntityProcessor.java
@ -0,0 +1,254 @@
 package org.apache.solr.handler.dataimport;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
 import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;
 import org.apache.commons.httpclient.HttpClient;
 import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.SolrServer;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
 import org.apache.solr.client.solrj.impl.XMLResponseParser;
 import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 /**
 * <p>
 * An implementation of {@link EntityProcessor} which fetches values from a
 * separate Solr implementation using the SolrJ client library. Yield a row per
 * Solr document.
 * </p>
 * <p>
 * Limitations: 
 * All configuration is evaluated at the beginning;
 * Only one query is walked;
 * </p>
 */
 public class SolrEntityProcessor extends EntityProcessorBase {
  private static final Logger LOG = LoggerFactory.getLogger(SolrEntityProcessor.class);
  public static final String SOLR_SERVER = "url";
  public static final String QUERY = "query";
  /**
   * (format="javabin|xml") default is javabin
   */
  public static final String FORMAT = "format";
  public static final String ROWS = "rows";
  public static final String FIELDS = "fields";
  public static final String FQ = "fq";
  public static final String TIMEOUT = "timeout";
  public static final int TIMEOUT_SECS = 5 * 60; // 5 minutes
  public static final int ROWS_DEFAULT = 50;
  private SolrServer solrServer = null;
  private String queryString;
  private int rows = ROWS_DEFAULT;
  private String[] filterQueries;
  private String[] fields;
  private int timeout = TIMEOUT_SECS;
  private boolean initDone = false;
  @Override
  protected void firstInit(Context context) {
    super.firstInit(context);
    try {
      String serverPath = context.getResolvedEntityAttribute(SOLR_SERVER);
      if (serverPath == null) {
        throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
            "SolrEntityProcessor: parameter 'url' is required");
      }
      HttpClient client = new HttpClient(
          new MultiThreadedHttpConnectionManager());
      URL url = new URL(serverPath);
      if ("xml".equals(context.getResolvedEntityAttribute(FORMAT))) {
        solrServer = new CommonsHttpSolrServer(url, client,
            new XMLResponseParser(), false);
        LOG.info("using XMLResponseParser");
      } else {
        solrServer = new CommonsHttpSolrServer(url, client);
        LOG.info("using BinaryResponseParser");
      }
    } catch (MalformedURLException e) {
      throw new DataImportHandlerException(DataImportHandlerException.SEVERE, e);
    }
    this.queryString = context.getResolvedEntityAttribute(QUERY);
    if (this.queryString == null) {
      throw new DataImportHandlerException(
          DataImportHandlerException.SEVERE,
          "SolrEntityProcessor: parameter 'query' is required"
      );
    }
    String rowsP = context.getResolvedEntityAttribute(ROWS);
    if (rowsP != null) {
      rows = Integer.parseInt(rowsP);
    }
    String fqAsString = context.getResolvedEntityAttribute(FQ);
    if (fqAsString != null) {
      this.filterQueries = fqAsString.split(",");
    }
    String fieldsAsString = context.getResolvedEntityAttribute(FIELDS);
    if (fieldsAsString != null) {
      this.fields = fieldsAsString.split(",");
    }
    String timeoutAsString = context.getResolvedEntityAttribute(TIMEOUT);
    if (timeoutAsString != null) {
      this.timeout = Integer.parseInt(timeoutAsString);
    }
  }
  @Override
  public Map<String,Object> nextRow() {
    buildIterator();
    return getNext();
  }
  /**
   * The following method changes the rowIterator mutable field. It requires
   * external synchronization. In fact when used in a multi-threaded setup the nextRow() method is called from a
   * synchronized block {@link ThreadedEntityProcessorWrapper#nextRow()}, so this
   * is taken care of.
   */
  private void buildIterator() {
    if (rowIterator == null) {
      // We could use an AtomicBoolean but there's no need since this method
      // would require anyway external synchronization
      if (!initDone) {
        initDone = true;
        SolrDocumentList solrDocumentList = doQuery(0);
        if (solrDocumentList != null) {
          rowIterator = new SolrDocumentListIterator(solrDocumentList);
        }
      }
      return;
    }
    SolrDocumentListIterator documentListIterator = (SolrDocumentListIterator) rowIterator;
    if (!documentListIterator.hasNext() && documentListIterator.hasMoreRows()) {
      SolrDocumentList solrDocumentList = doQuery(documentListIterator
          .getStart() + documentListIterator.getSize());
      if (solrDocumentList != null) {
        rowIterator = new SolrDocumentListIterator(solrDocumentList);
      }
    }
  }
  protected SolrDocumentList doQuery(int start) {
    SolrQuery solrQuery = new SolrQuery(queryString);
    solrQuery.setRows(rows);
    solrQuery.setStart(start);
    if (fields != null) {
      for (String field : fields) {
        solrQuery.addField(field);
      }
    }
    solrQuery.setFilterQueries(filterQueries);
    solrQuery.setTimeAllowed(timeout * 1000);
    QueryResponse response = null;
    try {
      response = solrServer.query(solrQuery);
    } catch (SolrServerException e) {
      if (ABORT.equals(onError)) {
        wrapAndThrow(SEVERE, e);
      } else if (SKIP.equals(onError)) {
        wrapAndThrow(DataImportHandlerException.SKIP_ROW, e);
      }
    }
    return response == null ? null : response.getResults();
  }
  private static class SolrDocumentListIterator implements Iterator<Map<String,Object>> {
    private final int start;
    private final int size;
    private final long numFound;
    private final Iterator<SolrDocument> solrDocumentIterator;
    public SolrDocumentListIterator(SolrDocumentList solrDocumentList) {
      this.solrDocumentIterator = solrDocumentList.iterator();
      this.numFound = solrDocumentList.getNumFound();
      // SolrQuery has the start field of type int while SolrDocumentList of
      // type long. We are always querying with an int so we can't receive a
      // long as output. That's the reason why the following cast seems safe
      this.start = (int) solrDocumentList.getStart();
      this.size = solrDocumentList.size();
    }
    @Override
    public boolean hasNext() {
      return solrDocumentIterator.hasNext();
    }
    @Override
    public Map<String,Object> next() {
      SolrDocument solrDocument = solrDocumentIterator.next();
      HashMap<String,Object> map = new HashMap<String,Object>();
      Collection<String> fields = solrDocument.getFieldNames();
      for (String field : fields) {
        Object fieldValue = solrDocument.getFieldValue(field);
        map.put(field, fieldValue);
      }
      return map;
    }
    public int getStart() {
      return start;
    }
    public int getSize() {
      return size;
    }
    public boolean hasMoreRows() {
      return numFound > start + size;
    }
    @Override
    public void remove() {
      throw new UnsupportedOperationException();
    }
  }
 }
--- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/MockSolrEntityProcessor.java
+++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/MockSolrEntityProcessor.java
@ -0,0 +1,64 @@
 package org.apache.solr.handler.dataimport;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
 public class MockSolrEntityProcessor extends SolrEntityProcessor {
  private final String[][][] docsData;
  private final int rows;
  private int queryCount = 0;
  public MockSolrEntityProcessor(String[][][] docsData) {
    this(docsData, ROWS_DEFAULT);
  }
  public MockSolrEntityProcessor(String[][][] docsData, int rows) {
    this.docsData = docsData;
    this.rows = rows;
  }
  @Override
  protected SolrDocumentList doQuery(int start) {
    queryCount++;
    return getDocs(start, rows);
  }
  private SolrDocumentList getDocs(int start, int rows) {
    SolrDocumentList docs = new SolrDocumentList();
    docs.setNumFound(docsData.length);
    docs.setStart(start);
    int endIndex = start + rows;
    int end = docsData.length < endIndex ? docsData.length : endIndex;
    for (int i = start; i < end; i++) {
      SolrDocument doc = new SolrDocument();
      for (String[] fields : docsData[i]) {
        doc.addField(fields[0], fields[1]);
      }
      docs.add(doc);
    }
    return docs;
  }
  public int getQueryCount() {
    return queryCount;
  }
 }
--- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSolrEntityProcessorEndToEnd.java
+++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSolrEntityProcessorEndToEnd.java
@ -0,0 +1,334 @@
 package org.apache.solr.handler.dataimport;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.File;
 import java.io.IOException;
 import java.net.URL;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import org.apache.commons.httpclient.HttpClient;
 import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
 import org.apache.commons.io.FileUtils;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.embedded.JettySolrRunner;
 import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
 import org.apache.solr.common.SolrInputDocument;
 import org.junit.After;
 import org.junit.Before;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 /**
 * End-to-end test of SolrEntityProcessor. "Real" test using embedded Solr
 */
 public class TestSolrEntityProcessorEndToEnd extends AbstractDataImportHandlerTestCase {
  private static Logger LOG = LoggerFactory.getLogger(TestSolrEntityProcessorEndToEnd.class);
  private static final String SOLR_SOURCE_URL = "http://localhost:8983/solr";
  private static final String SOLR_CONFIG = "dataimport-solrconfig.xml";
  private static final String SOLR_SCHEMA = "dataimport-schema.xml";
  private static final String SOLR_HOME = "dih/solr";
  private static final String CONF_DIR = "dih" + File.separator + "solr" + File.separator + "conf" + File.separator;
  private static final List<Map<String,Object>> DB_DOCS = new ArrayList<Map<String,Object>>();
  private static final List<Map<String,Object>> SOLR_DOCS = new ArrayList<Map<String,Object>>();
  static {
    // dynamic fields in the destination schema
    Map<String,Object> dbDoc = new HashMap<String,Object>();
    dbDoc.put("dbid_s", "1");
    dbDoc.put("dbdesc_s", "DbDescription");
    DB_DOCS.add(dbDoc);
    Map<String,Object> solrDoc = new HashMap<String,Object>();
    solrDoc.put("id", "1");
    solrDoc.put("desc", "SolrDescription");
    SOLR_DOCS.add(solrDoc);
  }
  private static final String DIH_CONFIG_TAGS_INNER_ENTITY = "<dataConfig>\r\n"
      + "  <dataSource type='MockDataSource' />\r\n"
      + "  <document>\r\n"
      + "    <entity name='db' query='select * from x'>\r\n"
      + "      <field column='dbid_s' />\r\n"
      + "      <field column='dbdesc_s' />\r\n"
      + "      <entity name='se' processor='SolrEntityProcessor' query='id:${db.dbid_s}'\n"
      + "     url='" + SOLR_SOURCE_URL + "' fields='id,desc'>\r\n"
      + "        <field column='id' />\r\n"
      + "        <field column='desc' />\r\n" + "      </entity>\r\n"
      + "    </entity>\r\n" + "  </document>\r\n" + "</dataConfig>\r\n";
  private SolrInstance instance = null;
  private JettySolrRunner jetty;
  private static String generateDIHConfig(String options) {
    return "<dataConfig>\r\n" + "  <document>\r\n"
        + "    <entity name='se' processor='SolrEntityProcessor'" + "   url='"
        + SOLR_SOURCE_URL + "' " + options + " />\r\n" + "  </document>\r\n"
        + "</dataConfig>\r\n";
  }
  @Override
  @Before
  public void setUp() throws Exception {
    super.setUp();
    // destination solr core
    initCore(SOLR_CONFIG, SOLR_SCHEMA, SOLR_HOME);
    // data source solr instance
    instance = new SolrInstance();
    instance.setUp();
    jetty = createJetty(instance);
  }
  @Override
  @After
  public void tearDown() throws Exception {
    try {
      deleteCore();
    } catch (Exception e) {
      LOG.error("Error deleting core", e);
    }
    jetty.stop();
    instance.tearDown();
    super.tearDown();
  }
  public void testFullImport() {
    assertQ(req("*:*"), "//result[@numFound='0']");
    try {
      addDocumentsToSolr(SOLR_DOCS);
      runFullImport(generateDIHConfig("query='*:*' rows='2' fields='id,desc' onError='skip'"));
    } catch (Exception e) {
      LOG.error(e.getMessage(), e);
      fail(e.getMessage());
    }
    assertQ(req("*:*"), "//result[@numFound='1']");
    assertQ(req("id:1"), "//result/doc/str[@name='id'][.='1']",
        "//result/doc/arr[@name='desc'][.='SolrDescription']");
  }
  public void testFullImportFqParam() {
    assertQ(req("*:*"), "//result[@numFound='0']");
    try {
      addDocumentsToSolr(generateSolrDocuments(30));
      Map<String,String> map = new HashMap<String,String>();
      map.put("rows", "50");
      runFullImport(generateDIHConfig("query='*:*' fq='desc:Description1*,desc:Description*2' rows='2'"), map);
    } catch (Exception e) {
      LOG.error(e.getMessage(), e);
      fail(e.getMessage());
    }
    assertQ(req("*:*"), "//result[@numFound='1']");
    assertQ(req("id:12"), "//result[@numFound='1']", "//result/doc/arr[@name='desc'][.='Description12']");
  }
  public void testFullImportFieldsParam() {
    assertQ(req("*:*"), "//result[@numFound='0']");
    try {
      addDocumentsToSolr(generateSolrDocuments(7));
      runFullImport(generateDIHConfig("query='*:*' fields='id' rows='2'"));
    } catch (Exception e) {
      LOG.error(e.getMessage(), e);
      fail(e.getMessage());
    }
    assertQ(req("*:*"), "//result[@numFound='7']");
    assertQ(req("id:1"), "//result[@numFound='1']");
    try {
      assertQ(req("id:1"), "//result/doc/arr[@name='desc']");
      fail("The document has a field with name desc");
    } catch(Exception e) {
    }
  }
  /**
   * Receive a row from SQL (Mock) and fetch a row from Solr
   */
  public void testFullImportInnerEntity() {
    assertQ(req("*:*"), "//result[@numFound='0']");
    try {
      MockDataSource.setIterator("select * from x", DB_DOCS.iterator());
      addDocumentsToSolr(SOLR_DOCS);
      runFullImport(DIH_CONFIG_TAGS_INNER_ENTITY);
    } catch (Exception e) {
      LOG.error(e.getMessage(), e);
      fail(e.getMessage());
    } finally {
      MockDataSource.clearCache();
    }
    assertQ(req("*:*"), "//result[@numFound='1']");
    assertQ(req("id:1"), "//result/doc/str[@name='id'][.='1']",
        "//result/doc/str[@name='dbdesc_s'][.='DbDescription']",
        "//result/doc/str[@name='dbid_s'][.='1']",
        "//result/doc/arr[@name='desc'][.='SolrDescription']");
  }
  public void testFullImportWrongSolrUrl() {
    try {
      jetty.stop();
    } catch (Exception e) {
      LOG.error("Error stopping jetty", e);
      fail(e.getMessage());
    }
    assertQ(req("*:*"), "//result[@numFound='0']");
    try {
      runFullImport(generateDIHConfig("query='*:*' rows='2' fields='id,desc' onError='skip'"));
    } catch (Exception e) {
      LOG.error(e.getMessage(), e);
      fail(e.getMessage());
    }
    assertQ(req("*:*"), "//result[@numFound='0']");
  }
  public void testFullImportBadConfig() {
    assertQ(req("*:*"), "//result[@numFound='0']");
    try {
      runFullImport(generateDIHConfig("query='bogus:3' rows='2' fields='id,desc' onError='abort'"));
    } catch (Exception e) {
      LOG.error(e.getMessage(), e);
      fail(e.getMessage());
    }
    assertQ(req("*:*"), "//result[@numFound='0']");
  }
  public void testFullImportMultiThreaded() {
    assertQ(req("*:*"), "//result[@numFound='0']");
    int numDocs = 37;
    List<Map<String,Object>> docList = generateSolrDocuments(numDocs);
    try {
      addDocumentsToSolr(docList);
      Map<String,String> map = new HashMap<String,String>();
      map.put("rows", "50");
      runFullImport(generateDIHConfig("query='*:*' rows='6' numThreads='4'"),
          map);
    } catch (Exception e) {
      LOG.error(e.getMessage(), e);
      fail(e.getMessage());
    }
    assertQ(req("*:*"), "//result[@numFound='" + numDocs + "']");
  }
  private static List<Map<String,Object>> generateSolrDocuments(int num) {
    List<Map<String,Object>> docList = new ArrayList<Map<String,Object>>();
    for (int i = 1; i <= num; i++) {
      Map<String,Object> map = new HashMap<String,Object>();
      map.put("id", i);
      map.put("desc", "Description" + i);
      docList.add(map);
    }
    return docList;
  }
  private void addDocumentsToSolr(List<Map<String,Object>> docs) throws SolrServerException, IOException {
    List<SolrInputDocument> sidl = new ArrayList<SolrInputDocument>();
    for (Map<String,Object> doc : docs) {
      SolrInputDocument sd = new SolrInputDocument();
      for (Entry<String,Object> entry : doc.entrySet()) {
        sd.addField(entry.getKey(), entry.getValue());
      }
      sidl.add(sd);
    }
    HttpClient client = new HttpClient(new MultiThreadedHttpConnectionManager());
    URL url = new URL(SOLR_SOURCE_URL);
    CommonsHttpSolrServer solrServer = new CommonsHttpSolrServer(url, client);
    solrServer.add(sidl);
    solrServer.commit(true, true);
  }
  private static class SolrInstance {
    File homeDir;
    File confDir;
    public String getHomeDir() {
      return homeDir.toString();
    }
    public String getSchemaFile() {
      return CONF_DIR + "dataimport-schema.xml";
    }
    public String getDataDir() {
      return dataDir.toString();
    }
    public String getSolrConfigFile() {
      return CONF_DIR + "dataimport-solrconfig.xml";
    }
    public void setUp() throws Exception {
      File home = new File(TEMP_DIR, getClass().getName() + "-"
          + System.currentTimeMillis());
      homeDir = new File(home + "inst");
      dataDir = new File(homeDir, "data");
      confDir = new File(homeDir, "conf");
      homeDir.mkdirs();
      dataDir.mkdirs();
      confDir.mkdirs();
      File f = new File(confDir, "solrconfig.xml");
      FileUtils.copyFile(getFile(getSolrConfigFile()), f);
      f = new File(confDir, "schema.xml");
      FileUtils.copyFile(getFile(getSchemaFile()), f);
      f = new File(confDir, "data-config.xml");
      FileUtils.copyFile(getFile(CONF_DIR + "dataconfig-contentstream.xml"), f);
    }
    public void tearDown() throws Exception {
      recurseDelete(homeDir);
    }
  }
  private JettySolrRunner createJetty(SolrInstance instance) throws Exception {
    System.setProperty("solr.solr.home", instance.getHomeDir());
    System.setProperty("solr.data.dir", instance.getDataDir());
    JettySolrRunner jetty = new JettySolrRunner("/solr", 8983);
    jetty.start();
    return jetty;
  }
 }
--- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSolrEntityProcessorUnit.java
+++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSolrEntityProcessorUnit.java
@ -0,0 +1,149 @@
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.solr.handler.dataimport;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.SynchronousQueue;
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 /**
 * Unit test of SolrEntityProcessor. A very basic test outside of the DIH.
 */
 public class TestSolrEntityProcessorUnit extends AbstractDataImportHandlerTestCase {
  private static final Logger LOG = LoggerFactory.getLogger(TestSolrEntityProcessorUnit.class);
  private static final String ID = "id";
  public void testQuery() {
    String[][][] docs = generateDocs(2);
    MockSolrEntityProcessor processor = new MockSolrEntityProcessor(docs);
    assertExpectedDocs(docs, processor);
    assertEquals(1, processor.getQueryCount());
  }
  public void testNumDocsGreaterThanRows() {
    String[][][] docs = generateDocs(44);
    MockSolrEntityProcessor processor = new MockSolrEntityProcessor(docs, 10);
    assertExpectedDocs(docs, processor);
    assertEquals(5, processor.getQueryCount());
  }
  public void testMultiValuedFields() {
    String[][][] docs = new String[1][2][2];
    String[][] doc = new String[][] { {"id", "1"}, {"multi", "multi1"},
        {"multi", "multi2"}, {"multi", "multi3"}};
    docs[0] = doc;
    MockSolrEntityProcessor processor = new MockSolrEntityProcessor(docs);
    Map<String,Object> next = processor.nextRow();
    assertNotNull(next);
    assertEquals(doc[0][1], next.get(doc[0][0]));
    String[] multiValued = {"multi1", "multi2", "multi3"};
    assertEquals(Arrays.asList(multiValued), next.get(doc[1][0]));
    assertEquals(1, processor.getQueryCount());
    assertNull(processor.nextRow());
  }
  public void testMultiThread() {
    int numThreads = 5;
    int numDocs = 40;
    String[][][] docs = generateDocs(numDocs);
    final MockSolrEntityProcessor entityProcessor = new MockSolrEntityProcessor(docs, 25);
    final Map<String,Map<String,Object>> rowList = new HashMap<String,Map<String,Object>>();
    final CountDownLatch latch = new CountDownLatch(numThreads);
    for (int i = 0; i < numThreads; i++) {
      Runnable runnable = new Runnable() {
        public void run() {
          try {
            while (true) {
              Map<String,Object> row;
              synchronized (entityProcessor) {
                row = entityProcessor.nextRow();
              }
              if (row == null) {
                break;
              }
              rowList.put(row.get(ID).toString(), row);
            }
          } finally {
            latch.countDown();
          }
        }
      };
      new ThreadPoolExecutor(0, Integer.MAX_VALUE, 5, TimeUnit.SECONDS,
          new SynchronousQueue<Runnable>()).execute(runnable);
    }
    try {
      latch.await();
    } catch (InterruptedException e) {
      LOG.error(e.getMessage(), e);
    }
    assertEquals(numDocs, rowList.size());
    for (String[][] expectedDoc : docs) {
      Map<String,Object> row = rowList.get(expectedDoc[0][1]);
      assertNotNull(row);
      int i = 0;
      for (Entry<String,Object> entry : row.entrySet()) {
        assertEquals(expectedDoc[i][0], entry.getKey());
        assertEquals(expectedDoc[i][1], entry.getValue());
        i++;
      }
      rowList.remove(expectedDoc[0][1]);
    }
    assertEquals(0, rowList.size());
  }
  private static String[][][] generateDocs(int numDocs) {
    String[][][] docs = new String[numDocs][2][2];
    for (int i = 0; i < numDocs; i++) {
      docs[i] = new String[][] { {"id", Integer.toString(i+1)},
          {"description", "Description" + Integer.toString(i+1)}};
    }
    return docs;
  }
  private static void assertExpectedDocs(String[][][] expectedDocs, SolrEntityProcessor processor) {
    for (String[][] expectedDoc : expectedDocs) {
      Map<String, Object> next = processor.nextRow();
      assertNotNull(next);
      assertEquals(expectedDoc[0][1], next.get(expectedDoc[0][0]));
      assertEquals(expectedDoc[1][1], next.get(expectedDoc[1][0]));
    }
    assertNull(processor.nextRow());
  }
 }
--- a/solr/example/example-DIH/README.txt
+++ b/solr/example/example-DIH/README.txt
@ -38,6 +38,10 @@ To import data from your imap server
 1. Edit the example-DIH/solr/mail/conf/data-config.xml and add details about username, password, imap server
 2. Connect to http://localhost:8983/solr/mail/dataimport?command=full-import
 To copy data from db Solr core, connect to
 http://localhost:8983/solr/solr/dataimport?command=full-import
 See also README.txt in the solr subdirectory, and check
 http://wiki.apache.org/solr/DataImportHandler for detailed
 usage guide and tutorial.
--- a/solr/example/example-DIH/solr/solr.xml
+++ b/solr/example/example-DIH/solr/solr.xml
@ -5,5 +5,6 @@
 		<core default="false" instanceDir="rss" name="rss"/>
 		<core default="false" instanceDir="mail" name="mail"/>
 		<core default="false" instanceDir="tika" name="tika"/>
    <core default="false" instanceDir="solr" name="solr"/>
 	</cores>
 </solr>
--- a/solr/example/example-DIH/solr/solr/conf/admin-extra.html
+++ b/solr/example/example-DIH/solr/solr/conf/admin-extra.html
@ -0,0 +1,31 @@
 <!--
 Licensed to the Apache Software Foundation (ASF) under one or more
 contributor license agreements.  See the NOTICE file distributed with
 this work for additional information regarding copyright ownership.
 The ASF licenses this file to You under the Apache License, Version 2.0
 (the "License"); you may not use this file except in compliance with
 the License.  You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 -->
 <!-- The content of this page will be statically included into the top
 of the admin page.  Uncomment this as an example to see there the content
 will show up.
 <hr>
 <i>This line will appear before the first table</i>
 <tr>
 <td colspan="2">
 This row will be appended to the end of the first table
 </td>
 </tr>
 <hr>
 -->
--- a/solr/example/example-DIH/solr/solr/conf/elevate.xml
+++ b/solr/example/example-DIH/solr/solr/conf/elevate.xml
@ -0,0 +1,36 @@
 <?xml version="1.0" encoding="UTF-8" ?>
 <!--
 Licensed to the Apache Software Foundation (ASF) under one or more
 contributor license agreements.  See the NOTICE file distributed with
 this work for additional information regarding copyright ownership.
 The ASF licenses this file to You under the Apache License, Version 2.0
 (the "License"); you may not use this file except in compliance with
 the License.  You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 -->
 <!-- If this file is found in the config directory, it will only be
     loaded once at startup.  If it is found in Solr's data
     directory, it will be re-loaded every commit.
 -->
 <elevate>
 <query text="foo bar">
  <doc id="1" />
  <doc id="2" />
  <doc id="3" />
 </query>
 <query text="ipod">
   <doc id="MA147LL/A" />  <!-- put the actual ipod at the top -->
   <doc id="IW-02" exclude="true" /> <!-- exclude this cable -->
 </query>
 </elevate>
--- a/solr/example/example-DIH/solr/solr/conf/protwords.txt
+++ b/solr/example/example-DIH/solr/solr/conf/protwords.txt
@ -0,0 +1,21 @@
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #-----------------------------------------------------------------------
 # Use a protected word file to protect against the stemmer reducing two
 # unrelated words to the same base word.
 # Some non-words that normally won't be encountered,
 # just to test that they won't be stemmed.
 dontstems
 zwhacky
--- a/solr/example/example-DIH/solr/solr/conf/schema.xml
+++ b/solr/example/example-DIH/solr/solr/conf/schema.xml
@ -0,0 +1,359 @@
 <?xml version="1.0" encoding="UTF-8" ?>
 <!--
 Licensed to the Apache Software Foundation (ASF) under one or more
 contributor license agreements.  See the NOTICE file distributed with
 this work for additional information regarding copyright ownership.
 The ASF licenses this file to You under the Apache License, Version 2.0
 (the "License"); you may not use this file except in compliance with
 the License.  You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 -->
 <!--  
 This is the Solr schema file. This file should be named "schema.xml" and
 should be in the conf directory under the solr home
 (i.e. ./solr/conf/schema.xml by default) 
 or located where the classloader for the Solr webapp can find it.
 This example schema is the recommended starting point for users.
 It should be kept correct and concise, usable out-of-the-box.
 For more information, on how to customize this file, please see
 http://wiki.apache.org/solr/SchemaXml
 -->
 <schema name="solr" version="1.1">
  <!-- attribute "name" is the name of this schema and is only used for display purposes.
       Applications should change this to reflect the nature of the search collection.
       version="1.1" is Solr's version number for the schema syntax and semantics.  It should
       not normally be changed by applications.
       1.0: multiValued attribute did not exist, all fields are multiValued by nature
       1.1: multiValued attribute introduced, false by default -->
  <types>
    <!-- field type definitions. The "name" attribute is
       just a label to be used by field definitions.  The "class"
       attribute and any other attributes determine the real
       behavior of the fieldType.
         Class names starting with "solr" refer to java classes in the
       org.apache.solr.analysis package.
    -->
    <!-- The StrField type is not analyzed, but indexed/stored verbatim.  
       - StrField and TextField support an optional compressThreshold which
       limits compression (if enabled in the derived fields) to values which
       exceed a certain size (in characters).
    -->
    <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
    <!-- boolean type: "true" or "false" -->
    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
    <!-- The optional sortMissingLast and sortMissingFirst attributes are
         currently supported on types that are sorted internally as strings.
       - If sortMissingLast="true", then a sort on this field will cause documents
         without the field to come after documents with the field,
         regardless of the requested sort order (asc or desc).
       - If sortMissingFirst="true", then a sort on this field will cause documents
         without the field to come before documents with the field,
         regardless of the requested sort order.
       - If sortMissingLast="false" and sortMissingFirst="false" (the default),
         then default lucene sorting will be used which places docs without the
         field first in an ascending sort and last in a descending sort.
    -->    
    <!-- numeric field types that store and index the text
         value verbatim (and hence don't support range queries, since the
         lexicographic ordering isn't equal to the numeric ordering) -->
    <fieldType name="integer" class="solr.IntField" omitNorms="true"/>
    <fieldType name="long" class="solr.LongField" omitNorms="true"/>
    <fieldType name="float" class="solr.FloatField" omitNorms="true"/>
    <fieldType name="double" class="solr.DoubleField" omitNorms="true"/>
    <!-- Numeric field types that manipulate the value into
         a string value that isn't human-readable in its internal form,
         but with a lexicographic ordering the same as the numeric ordering,
         so that range queries work correctly. -->
    <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>
    <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
         is a more restricted form of the canonical representation of dateTime
         http://www.w3.org/TR/xmlschema-2/#dateTime    
         The trailing "Z" designates UTC time and is mandatory.
         Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
         All other components are mandatory.
         Expressions can also be used to denote calculations that should be
         performed relative to "NOW" to determine the value, ie...
               NOW/HOUR
                  ... Round to the start of the current hour
               NOW-1DAY
                  ... Exactly 1 day prior to now
               NOW/DAY+6MONTHS+3DAYS
                  ... 6 months and 3 days in the future from the start of
                      the current day
         Consult the DateField javadocs for more information.
      -->
    <fieldType name="date" class="solr.DateField" sortMissingLast="true" omitNorms="true"/>
    <!-- The "RandomSortField" is not used to store or search any
         data.  You can declare fields of this type it in your schema
         to generate psuedo-random orderings of your docs for sorting 
         purposes.  The ordering is generated based on the field name 
         and the version of the index, As long as the index version
         remains unchanged, and the same field name is reused,
         the ordering of the docs will be consistent.  
         If you want differend psuedo-random orderings of documents,
         for the same version of the index, use a dynamicField and
         change the name
     -->
    <fieldType name="random" class="solr.RandomSortField" indexed="true" />
    <!-- solr.TextField allows the specification of custom text analyzers
         specified as a tokenizer and a list of token filters. Different
         analyzers may be specified for indexing and querying.
         The optional positionIncrementGap puts space between multiple fields of
         this type on the same document, with the purpose of preventing false phrase
         matching across fields.
         For more info on customizing your analyzer chain, please see
         http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
     -->
    <!-- One can also specify an existing Analyzer class that has a
         default constructor via the class attribute on the analyzer element
    <fieldType name="text_greek" class="solr.TextField">
      <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
    </fieldType>
    -->
    <!-- A text field that only splits on whitespace for exact matching of words -->
    <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
      </analyzer>
    </fieldType>
    <!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
        words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
        so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
        Synonyms and stopwords are customized by external files, and stemming is enabled.
        Duplicate tokens at the same position (which may result from Stemmed Synonyms or
        WordDelim parts) are removed.
        -->
    <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <!-- in this example, we will only use synonyms at query time
        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
        -->
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
        <filter class="solr.PorterStemFilterFactory"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
        <filter class="solr.PorterStemFilterFactory"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
    </fieldType>
    <!-- Less flexible matching, but less false matches.  Probably not ideal for product names,
         but may be good for SKUs.  Can insert dashes in the wrong place and still match. -->
    <fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
        <filter class="solr.EnglishMinimalStemFilterFactory"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
    </fieldType>
    <!-- This is an example of using the KeywordTokenizer along
         With various TokenFilterFactories to produce a sortable field
         that does not include some properties of the source text
      -->
    <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
      <analyzer>
        <!-- KeywordTokenizer does no actual tokenizing, so the entire
             input string is preserved as a single token
          -->
        <tokenizer class="solr.KeywordTokenizerFactory"/>
        <!-- The LowerCase TokenFilter does what you expect, which can be
             when you want your sorting to be case insensitive
          -->
        <filter class="solr.LowerCaseFilterFactory" />
        <!-- The TrimFilter removes any leading or trailing whitespace -->
        <filter class="solr.TrimFilterFactory" />
        <!-- The PatternReplaceFilter gives you the flexibility to use
             Java Regular expression to replace any sequence of characters
             matching a pattern with an arbitrary replacement string, 
             which may include back refrences to portions of the orriginal
             string matched by the pattern.
             See the Java Regular Expression documentation for more
             infomation on pattern and replacement string syntax.
             http://java.sun.com/j2se/1.6.0/docs/api/java/util/regex/package-summary.html
          -->
        <filter class="solr.PatternReplaceFilterFactory"
                pattern="([^a-z])" replacement="" replace="all"
        />
      </analyzer>
    </fieldType>
    <!-- since fields of this type are by default not stored or indexed, any data added to 
         them will be ignored outright 
     --> 
    <fieldtype name="ignored" stored="false" indexed="false" class="solr.StrField" /> 
 </types>
 <fields>
   <!-- Valid attributes for fields:
     name: mandatory - the name for the field
     type: mandatory - the name of a previously defined type from the <types> section
     indexed: true if this field should be indexed (searchable or sortable)
     stored: true if this field should be retrievable
     compressed: [false] if this field should be stored using gzip compression
       (this will only apply if the field type is compressable; among
       the standard field types, only TextField and StrField are)
     multiValued: true if this field may contain multiple values per document
     omitNorms: (expert) set to true to omit the norms associated with
       this field (this disables length normalization and index-time
       boosting for the field, and saves some memory).  Only full-text
       fields or fields that need an index-time boost need norms.
     termVectors: [false] set to true to store the term vector for a given field.
       When using MoreLikeThis, fields used for similarity should be stored for 
       best performance.
   -->
   <field name="id" type="string" indexed="true" stored="true" required="true" /> 
   <field name="sku" type="textTight" indexed="true" stored="true" omitNorms="true"/>
   <field name="name" type="text" indexed="true" stored="true"/>
   <field name="nameSort" type="string" indexed="true" stored="false"/>
   <field name="alphaNameSort" type="alphaOnlySort" indexed="true" stored="false"/>
   <field name="manu" type="text" indexed="true" stored="true" omitNorms="true"/>
   <field name="cat" type="text_ws" indexed="true" stored="true" multiValued="true" omitNorms="true" termVectors="true" />
   <field name="features" type="text" indexed="true" stored="true" multiValued="true"/>
   <field name="includes" type="text" indexed="true" stored="true"/>
   <field name="weight" type="sfloat" indexed="true" stored="true"/>
   <field name="price"  type="sfloat" indexed="true" stored="true"/>
   <!-- "default" values can be specified for fields, indicating which
        value should be used if no value is specified when adding a document.
     -->
   <field name="popularity" type="sint" indexed="true" stored="true" default="0"/>
   <field name="inStock" type="boolean" indexed="true" stored="true"/>
   <!-- Some sample docs exists solely to demonstrate the spellchecker
        functionality, this is the only field they container.
        Typically you might build the spellchecker of "catchall" type field
        containing all of the text in each document
     -->
   <field name="word" type="string" indexed="true" stored="true"/>
   <!-- catchall field, containing all other searchable text fields (implemented
        via copyField further on in this schema  -->
   <field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
   <!-- non-tokenized version of manufacturer to make it easier to sort or group
        results by manufacturer.  copied from "manu" via copyField -->
   <field name="manu_exact" type="string" indexed="true" stored="false"/>
   <!-- Here, default is used to create a "timestamp" field indicating
        When each document was indexed.
     -->
   <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
   <!-- Dynamic field definitions.  If a field name is not found, dynamicFields
        will be used if the name matches any of the patterns.
        RESTRICTION: the glob-like pattern in the name attribute must have
        a "*" only at the start or the end.
        EXAMPLE:  name="*_i" will match any field ending in _i (like myid_i, z_i)
        Longer patterns will be matched first.  if equal size patterns
        both match, the first appearing in the schema will be used.  -->
   <dynamicField name="*_i"  type="sint"    indexed="true"  stored="true"/>
   <dynamicField name="*_s"  type="string"  indexed="true"  stored="true"/>
   <dynamicField name="*_l"  type="slong"   indexed="true"  stored="true"/>
   <dynamicField name="*_t"  type="text"    indexed="true"  stored="true"/>
   <dynamicField name="*_b"  type="boolean" indexed="true"  stored="true"/>
   <dynamicField name="*_f"  type="sfloat"  indexed="true"  stored="true"/>
   <dynamicField name="*_d"  type="sdouble" indexed="true"  stored="true"/>
   <dynamicField name="*_dt" type="date"    indexed="true"  stored="true"/>
   <dynamicField name="random*" type="random" />
   <!-- uncomment the following to ignore any fields that don't already match an existing 
        field name or dynamic field, rather than reporting them as an error. 
        alternately, change the type="ignored" to some other type e.g. "text" if you want 
        unknown fields indexed and/or stored by default --> 
   <!--dynamicField name="*" type="ignored" multiValued="true" /-->
 </fields>
 <!-- Field to use to determine and enforce document uniqueness. 
      Unless this field is marked with required="false", it will be a required field
   -->
 <uniqueKey>id</uniqueKey>
 <!-- field for the QueryParser to use when an explicit fieldname is absent -->
 <defaultSearchField>text</defaultSearchField>
 <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
 <solrQueryParser defaultOperator="OR"/>
  <!-- copyField commands copy one field to another at the time a document
        is added to the index.  It's used either to index the same field differently,
        or to add multiple fields to the same field for easier/faster searching.  -->
   <copyField source="id" dest="sku"/>
   <copyField source="cat" dest="text"/>
   <copyField source="name" dest="text"/>
   <copyField source="name" dest="nameSort"/>
   <copyField source="name" dest="alphaNameSort"/>
   <copyField source="manu" dest="text"/>
   <copyField source="features" dest="text"/>
   <copyField source="includes" dest="text"/>
   <copyField source="manu" dest="manu_exact"/>
 <!-- Similarity is the scoring routine for each document vs. a query.
      A custom similarity may be specified here, but the default is fine
      for most applications.  -->
 <!-- <similarity class="org.apache.lucene.search.similarities.DefaultSimilarity"/> -->
 </schema>
--- a/solr/example/example-DIH/solr/solr/conf/scripts.conf
+++ b/solr/example/example-DIH/solr/solr/conf/scripts.conf
@ -0,0 +1,24 @@
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 user=
 solr_hostname=localhost
 solr_port=8983
 rsyncd_port=18983
 data_dir=
 webapp_name=solr
 master_host=
 master_data_dir=
 master_status_dir=
--- a/solr/example/example-DIH/solr/solr/conf/solr-data-config.xml
+++ b/solr/example/example-DIH/solr/solr/conf/solr-data-config.xml
@ -0,0 +1,22 @@
 <!--
  Licensed to the Apache Software Foundation (ASF) under one or more
  contributor license agreements.  See the NOTICE file distributed with
  this work for additional information regarding copyright ownership.
  The ASF licenses this file to You under the Apache License, Version 2.0
  (the "License"); you may not use this file except in compliance with
  the License.  You may obtain a copy of the License at
      http://www.apache.org/licenses/LICENSE-2.0
  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
  -->
 <dataConfig>
  <document>
    <entity name="sep" processor="SolrEntityProcessor" url="http://localhost:8983/solr/db " query="*:*"/>
  </document>
 </dataConfig>
--- a/solr/example/example-DIH/solr/solr/conf/solrconfig.xml
+++ b/solr/example/example-DIH/solr/solr/conf/solrconfig.xml
@ -0,0 +1,569 @@
 <?xml version="1.0" encoding="UTF-8" ?>
 <!--
 Licensed to the Apache Software Foundation (ASF) under one or more
 contributor license agreements.  See the NOTICE file distributed with
 this work for additional information regarding copyright ownership.
 The ASF licenses this file to You under the Apache License, Version 2.0
 (the "License"); you may not use this file except in compliance with
 the License.  You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 -->
 <config>
  <!--
    Controls what version of Lucene various components of Solr adhere to. Generally, you want
    to use the latest version to get all bug fixes and improvements. It is highly recommended 
    that you fully re-index after changing this setting as it can affect both how text is indexed
    and queried.
  -->
  <luceneMatchVersion>LUCENE_40</luceneMatchVersion>
  <jmx />
  <lib dir="../../../../dist/" regex="apache-solr-dataimporthandler-.*\.jar" />
  <indexDefaults>
   <!-- Values here affect all index writers and act as a default unless overridden. -->
    <useCompoundFile>false</useCompoundFile>
    <mergeFactor>10</mergeFactor>
    <!--
     If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first.
     -->
    <!--<maxBufferedDocs>1000</maxBufferedDocs>-->
    <!-- Tell Lucene when to flush documents to disk.
    Giving Lucene more memory for indexing means faster indexing at the cost of more RAM
    If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first.
    -->
    <ramBufferSizeMB>32</ramBufferSizeMB>
    <maxMergeDocs>2147483647</maxMergeDocs>
    <maxFieldLength>10000</maxFieldLength>
    <writeLockTimeout>1000</writeLockTimeout>
    <!--
     Expert:
     The Merge Policy in Lucene controls how merging is handled by Lucene.  The default in 2.3 is the LogByteSizeMergePolicy, previous
     versions used LogDocMergePolicy.
     LogByteSizeMergePolicy chooses segments to merge based on their size.  The Lucene 2.2 default, LogDocMergePolicy chose when
     to merge based on number of documents
     Other implementations of MergePolicy must have a no-argument constructor
     -->
    <!--<mergePolicy>org.apache.lucene.index.LogByteSizeMergePolicy</mergePolicy>-->
    <!--
     Expert:
     The Merge Scheduler in Lucene controls how merges are performed.  The ConcurrentMergeScheduler (Lucene 2.3 default)
      can perform merges in the background using separate threads.  The SerialMergeScheduler (Lucene 2.2 default) does not.
     -->
    <!--<mergeScheduler>org.apache.lucene.index.ConcurrentMergeScheduler</mergeScheduler>-->
    <!--
      As long as Solr is the only process modifying your index, it is
      safe to use Lucene's in process locking mechanism.  But you may
      specify one of the other Lucene LockFactory implementations in
      the event that you have a custom situation.
      none = NoLockFactory (typically only used with read only indexes)
      single = SingleInstanceLockFactory (suggested)
      native = NativeFSLockFactory
      simple = SimpleFSLockFactory
      ('simple' is the default for backwards compatibility with Solr 1.2)
    -->
    <lockType>single</lockType>
  </indexDefaults>
  <mainIndex>
    <!-- options specific to the main on-disk lucene index -->
    <useCompoundFile>false</useCompoundFile>
    <ramBufferSizeMB>32</ramBufferSizeMB>
    <mergeFactor>10</mergeFactor>
    <!-- Deprecated -->
    <!--<maxBufferedDocs>1000</maxBufferedDocs>-->
    <maxMergeDocs>2147483647</maxMergeDocs>
    <maxFieldLength>10000</maxFieldLength>
    <!-- If true, unlock any held write or commit locks on startup. 
         This defeats the locking mechanism that allows multiple
         processes to safely access a lucene index, and should be
         used with care.
         This is not needed if lock type is 'none' or 'single'
     -->
    <unlockOnStartup>false</unlockOnStartup>
  </mainIndex>
  <!-- the default high-performance update handler -->
  <updateHandler class="solr.DirectUpdateHandler2">
    <!-- A prefix of "solr." for class names is an alias that
         causes solr to search appropriate packages, including
         org.apache.solr.(search|update|request|core|analysis)
     -->
    <!-- Limit the number of deletions Solr will buffer during doc updating.
        Setting this lower can help bound memory use during indexing.
    -->
    <maxPendingDeletes>100000</maxPendingDeletes>
    <!-- Perform a <commit/> automatically under certain conditions:
         maxDocs - number of updates since last commit is greater than this
         maxTime - oldest uncommited update (in ms) is this long ago
    <autoCommit> 
      <maxDocs>10000</maxDocs>
      <maxTime>1000</maxTime> 
    </autoCommit>
    -->
    <!-- The RunExecutableListener executes an external command.
         exe - the name of the executable to run
         dir - dir to use as the current working directory. default="."
         wait - the calling thread waits until the executable returns. default="true"
         args - the arguments to pass to the program.  default=nothing
         env - environment variables to set.  default=nothing
      -->
    <!-- A postCommit event is fired after every commit or optimize command
    <listener event="postCommit" class="solr.RunExecutableListener">
      <str name="exe">solr/bin/snapshooter</str>
      <str name="dir">.</str>
      <bool name="wait">true</bool>
      <arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
      <arr name="env"> <str>MYVAR=val1</str> </arr>
    </listener>
    -->
    <!-- A postOptimize event is fired only after every optimize command, useful
         in conjunction with index distribution to only distribute optimized indicies 
    <listener event="postOptimize" class="solr.RunExecutableListener">
      <str name="exe">snapshooter</str>
      <str name="dir">solr/bin</str>
      <bool name="wait">true</bool>
    </listener>
    -->
  </updateHandler>
  <query>
    <!-- Maximum number of clauses in a boolean query... can affect
        range or prefix queries that expand to big boolean
        queries.  An exception is thrown if exceeded.  -->
    <maxBooleanClauses>1024</maxBooleanClauses>
    <!-- Cache used by SolrIndexSearcher for filters (DocSets),
         unordered sets of *all* documents that match a query.
         When a new searcher is opened, its caches may be prepopulated
         or "autowarmed" using data from caches in the old searcher.
         autowarmCount is the number of items to prepopulate.  For LRUCache,
         the autowarmed items will be the most recently accessed items.
       Parameters:
         class - the SolrCache implementation (currently only LRUCache)
         size - the maximum number of entries in the cache
         initialSize - the initial capacity (number of entries) of
           the cache.  (seel java.util.HashMap)
         autowarmCount - the number of entries to prepopulate from
           and old cache.
         -->
    <filterCache
      class="solr.LRUCache"
      size="512"
      initialSize="512"
      autowarmCount="256"/>
   <!-- queryResultCache caches results of searches - ordered lists of
         document ids (DocList) based on a query, a sort, and the range
         of documents requested.  -->
    <queryResultCache
      class="solr.LRUCache"
      size="512"
      initialSize="512"
      autowarmCount="256"/>
  <!-- documentCache caches Lucene Document objects (the stored fields for each document).
       Since Lucene internal document ids are transient, this cache will not be autowarmed.  -->
    <documentCache
      class="solr.LRUCache"
      size="512"
      initialSize="512"
      autowarmCount="0"/>
    <!-- If true, stored fields that are not requested will be loaded lazily.
    This can result in a significant speed improvement if the usual case is to
    not load all stored fields, especially if the skipped fields are large compressed
    text fields.
    -->
    <enableLazyFieldLoading>true</enableLazyFieldLoading>
    <!-- Example of a generic cache.  These caches may be accessed by name
         through SolrIndexSearcher.getCache(),cacheLookup(), and cacheInsert().
         The purpose is to enable easy caching of user/application level data.
         The regenerator argument should be specified as an implementation
         of solr.search.CacheRegenerator if autowarming is desired.  -->
    <!--
    <cache name="myUserCache"
      class="solr.LRUCache"
      size="4096"
      initialSize="1024"
      autowarmCount="1024"
      regenerator="org.mycompany.mypackage.MyRegenerator"
      />
    -->
   <!-- An optimization that attempts to use a filter to satisfy a search.
         If the requested sort does not include score, then the filterCache
         will be checked for a filter matching the query. If found, the filter
         will be used as the source of document ids, and then the sort will be
         applied to that.
    <useFilterForSortedQuery>true</useFilterForSortedQuery>
   -->
   <!-- An optimization for use with the queryResultCache.  When a search
         is requested, a superset of the requested number of document ids
         are collected.  For example, if a search for a particular query
         requests matching documents 10 through 19, and queryWindowSize is 50,
         then documents 0 through 49 will be collected and cached.  Any further
         requests in that range can be satisfied via the cache.  -->
    <queryResultWindowSize>50</queryResultWindowSize>
    <!-- Maximum number of documents to cache for any entry in the
         queryResultCache. -->
    <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
    <!-- This entry enables an int hash representation for filters (DocSets)
         when the number of items in the set is less than maxSize.  For smaller
         sets, this representation is more memory efficient, more efficient to
         iterate over, and faster to take intersections.  -->
    <HashDocSet maxSize="3000" loadFactor="0.75"/>
    <!-- a newSearcher event is fired whenever a new searcher is being prepared
         and there is a current searcher handling requests (aka registered). -->
    <!-- QuerySenderListener takes an array of NamedList and executes a
         local query request for each NamedList in sequence. -->
    <listener event="newSearcher" class="solr.QuerySenderListener">
      <arr name="queries">
        <lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst>
        <lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>
        <lst><str name="q">static newSearcher warming query from solrconfig.xml</str></lst>
      </arr>
    </listener>
    <!-- a firstSearcher event is fired whenever a new searcher is being
         prepared but there is no current registered searcher to handle
         requests or to gain autowarming data from. -->
    <listener event="firstSearcher" class="solr.QuerySenderListener">
      <arr name="queries">
      </arr>
    </listener>
    <!-- If a search request comes in and there is no current registered searcher,
         then immediately register the still warming searcher and use it.  If
         "false" then all requests will block until the first searcher is done
         warming. -->
    <useColdSearcher>false</useColdSearcher>
    <!-- Maximum number of searchers that may be warming in the background
      concurrently.  An error is returned if this limit is exceeded. Recommend
      1-2 for read-only slaves, higher for masters w/o cache warming. -->
    <maxWarmingSearchers>4</maxWarmingSearchers>
  </query>
  <!-- 
    Let the dispatch filter handler /select?qt=XXX
    handleSelect=true will use consistent error handling for /select and /update
    handleSelect=false will use solr1.1 style error formatting
    -->
  <requestDispatcher handleSelect="true" >
    <!--Make sure your system has some authentication before enabling remote streaming!  -->
    <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" />
    <!-- Set HTTP caching related parameters (for proxy caches and clients).
         To get the behaviour of Solr 1.2 (ie: no caching related headers)
         use the never304="true" option and do not specify a value for
         <cacheControl>
    -->
    <httpCaching never304="true">
    <!--httpCaching lastModifiedFrom="openTime"
                 etagSeed="Solr"-->
       <!-- lastModFrom="openTime" is the default, the Last-Modified value
            (and validation against If-Modified-Since requests) will all be
            relative to when the current Searcher was opened.
            You can change it to lastModFrom="dirLastMod" if you want the
            value to exactly corrispond to when the physical index was last
            modified.
            etagSeed="..." is an option you can change to force the ETag
            header (and validation against If-None-Match requests) to be
            differnet even if the index has not changed (ie: when making
            significant changes to your config file)
            lastModifiedFrom and etagSeed are both ignored if you use the
            never304="true" option.
       -->
       <!-- If you include a <cacheControl> directive, it will be used to
            generate a Cache-Control header, as well as an Expires header
            if the value contains "max-age="
            By default, no Cache-Control header is generated.
            You can use the <cacheControl> option even if you have set
            never304="true"
       -->
       <!-- <cacheControl>max-age=30, public</cacheControl> -->
    </httpCaching>
  </requestDispatcher>
  <!-- requestHandler plugins... incoming queries will be dispatched to the
     correct handler based on the path or the qt (query type) param.
     Names starting with a '/' are accessed with the a path equal to the 
     registered name.  Names without a leading '/' are accessed with:
      http://host/app/select?qt=name
     If no qt is defined, the requestHandler that declares default="true"
     will be used.
  -->
  <requestHandler name="standard" class="solr.StandardRequestHandler" default="true">
    <!-- default values for query parameters -->
     <lst name="defaults">
       <str name="echoParams">explicit</str>
       <!-- 
       <int name="rows">10</int>
       <str name="fl">*</str>
       <str name="version">2.1</str>
        -->
     </lst>
  </requestHandler>
  <!-- SpellCheckerRequestHandler takes in a word (or several words) as the
       value of the "q" parameter and returns a list of alternative spelling
       suggestions.  If invoked with a ...&cmd=rebuild, it will rebuild the
       spellchecker index.
  -->
  <requestHandler name="spellchecker" class="solr.SpellCheckerRequestHandler" startup="lazy">
    <!-- default values for query parameters -->
     <lst name="defaults">
       <int name="suggestionCount">1</int>
       <float name="accuracy">0.5</float>
     </lst>
     <!-- Main init params for handler -->
     <!-- The directory where your SpellChecker Index should live.   -->
     <!-- May be absolute, or relative to the Solr "dataDir" directory. -->
     <!-- If this option is not specified, a RAM directory will be used -->
     <str name="spellcheckerIndexDir">spell</str>
     <!-- the field in your schema that you want to be able to build -->
     <!-- your spell index on. This should be a field that uses a very -->
     <!-- simple FieldType without a lot of Analysis (ie: string) -->
     <str name="termSourceField">word</str>
   </requestHandler>
   <requestHandler name="/mlt" class="solr.MoreLikeThisHandler">
     <lst name="defaults">
       <str name="mlt.fl">manu,cat</str>
       <int name="mlt.mindf">1</int>
     </lst>
   </requestHandler>
   <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
    <lst name="defaults">
    	<str name="config">solr-data-config.xml</str>
    </lst>
   </requestHandler>
  <!--
   Search components are registered to SolrCore and used by Search Handlers
   By default, the following components are avaliable:
   <searchComponent name="query"     class="org.apache.solr.handler.component.QueryComponent" />
   <searchComponent name="facet"     class="org.apache.solr.handler.component.FacetComponent" />
   <searchComponent name="mlt"       class="org.apache.solr.handler.component.MoreLikeThisComponent" />
   <searchComponent name="highlight" class="org.apache.solr.handler.component.HighlightComponent" />
   <searchComponent name="debug"     class="org.apache.solr.handler.component.DebugComponent" />
   If you register a searchComponent to one of the standard names, that will be used instead.
   -->
  <requestHandler name="/search" class="org.apache.solr.handler.component.SearchHandler">
    <lst name="defaults">
      <str name="echoParams">explicit</str>
    </lst>
    <!--
    By default, this will register the following components:
    <arr name="components">
      <str>query</str>
      <str>facet</str>
      <str>mlt</str>
      <str>highlight</str>
      <str>debug</str>
    </arr>
    To insert handlers before or after the 'standard' components, use:
    <arr name="first-components">
      <str>first</str>
    </arr>
    <arr name="last-components">
      <str>last</str>
    </arr>
    -->
  </requestHandler>
  <searchComponent name="elevator" class="org.apache.solr.handler.component.QueryElevationComponent" >
    <!-- pick a fieldType to analyze queries -->
    <str name="queryFieldType">string</str>
    <str name="config-file">elevate.xml</str>
  </searchComponent>
  <requestHandler name="/elevate" class="org.apache.solr.handler.component.SearchHandler" startup="lazy">
    <lst name="defaults">
      <str name="echoParams">explicit</str>
    </lst>
    <arr name="last-components">
      <str>elevator</str>
    </arr>
  </requestHandler>
  <!-- Update request handler.  
       Note: Since solr1.1 requestHandlers requires a valid content type header if posted in 
       the body. For example, curl now requires: -H 'Content-type:text/xml; charset=utf-8'
       The response format differs from solr1.1 formatting and returns a standard error code.
       To enable solr1.1 behavior, remove the /update handler or change its path
       "update.processor.class" is the class name for the UpdateRequestProcessor.  It is initalized
       only once.  This can not be changed for each request.
    -->
  <requestHandler name="/update" class="solr.XmlUpdateRequestHandler" >
    <!--
    <str name="update.processor.class">org.apache.solr.handler.UpdateRequestProcessor</str>
    -->
  </requestHandler>
  <!-- CSV update handler, loaded on demand -->
  <requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy" />
  <!-- 
   Admin Handlers - This will register all the standard admin RequestHandlers.  Adding 
   this single handler is equivolent to registering:
  <requestHandler name="/admin/luke"       class="org.apache.solr.handler.admin.LukeRequestHandler" />
  <requestHandler name="/admin/system"     class="org.apache.solr.handler.admin.SystemInfoHandler" />
  <requestHandler name="/admin/plugins"    class="org.apache.solr.handler.admin.PluginInfoHandler" />
  <requestHandler name="/admin/threads"    class="org.apache.solr.handler.admin.ThreadDumpHandler" />
  <requestHandler name="/admin/properties" class="org.apache.solr.handler.admin.PropertiesRequestHandler" />
  <requestHandler name="/admin/file"       class="org.apache.solr.handler.admin.ShowFileRequestHandler" >
  If you wish to hide files under ${solr.home}/conf, explicitly register the ShowFileRequestHandler using:
  <requestHandler name="/admin/file" class="org.apache.solr.handler.admin.ShowFileRequestHandler" >
    <lst name="invariants">
     <str name="hidden">synonyms.txt</str> 
     <str name="hidden">anotherfile.txt</str> 
    </lst>
  </requestHandler>
  -->
  <requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" />
  <!-- Echo the request contents back to the client -->
  <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
    <lst name="defaults">
     <str name="echoParams">explicit</str> <!-- for all params (including the default etc) use: 'all' -->
     <str name="echoHandler">true</str>
    </lst>
  </requestHandler>
  <highlighting>
   <!-- Configure the standard fragmenter -->
   <!-- This could most likely be commented out in the "default" case -->
   <fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true">
    <lst name="defaults">
     <int name="hl.fragsize">100</int>
    </lst>
   </fragmenter>
   <!-- A regular-expression-based fragmenter (f.i., for sentence extraction) -->
   <fragmenter name="regex" class="org.apache.solr.highlight.RegexFragmenter">
    <lst name="defaults">
      <!-- slightly smaller fragsizes work better because of slop -->
      <int name="hl.fragsize">70</int>
      <!-- allow 50% slop on fragment sizes -->
      <float name="hl.regex.slop">0.5</float> 
      <!-- a basic sentence pattern -->
      <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
    </lst>
   </fragmenter>
   <!-- Configure the standard formatter -->
   <formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true">
    <lst name="defaults">
     <str name="hl.simple.pre"><![CDATA[<em>]]></str>
     <str name="hl.simple.post"><![CDATA[</em>]]></str>
    </lst>
   </formatter>
  </highlighting>
  <!-- queryResponseWriter plugins... query responses will be written using the
    writer specified by the 'wt' request parameter matching the name of a registered
    writer.
    The "default" writer is the default and will be used if 'wt' is not specified 
    in the request. XMLResponseWriter will be used if nothing is specified here.
    The json, python, and ruby writers are also available by default.
    <queryResponseWriter name="xml" class="solr.XMLResponseWriter" default="true"/>
    <queryResponseWriter name="json" class="solr.JSONResponseWriter"/>
    <queryResponseWriter name="python" class="solr.PythonResponseWriter"/>
    <queryResponseWriter name="ruby" class="solr.RubyResponseWriter"/>
    <queryResponseWriter name="php" class="solr.PHPResponseWriter"/>
    <queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/>
    <queryResponseWriter name="custom" class="com.example.MyResponseWriter"/>
  -->
  <!-- XSLT response writer transforms the XML output by any xslt file found
       in Solr's conf/xslt directory.  Changes to xslt files are checked for
       every xsltCacheLifetimeSeconds.  
   -->
  <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
    <int name="xsltCacheLifetimeSeconds">5</int>
  </queryResponseWriter> 
  <!-- config for the admin interface --> 
  <admin>
    <defaultQuery>*:*</defaultQuery>
    <!-- configure a healthcheck file for servers behind a loadbalancer
    <healthcheck type="file">server-enabled</healthcheck>
    -->
  </admin>
 </config>
--- a/solr/example/example-DIH/solr/solr/conf/stopwords.txt
+++ b/solr/example/example-DIH/solr/solr/conf/stopwords.txt
@ -0,0 +1,58 @@
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #-----------------------------------------------------------------------
 # a couple of test stopwords to test that the words are really being
 # configured from this file:
 stopworda
 stopwordb
 #Standard english stop words taken from Lucene's StopAnalyzer
 a
 an
 and
 are
 as
 at
 be
 but
 by
 for
 if
 in
 into
 is
 it
 no
 not
 of
 on
 or
 s
 such
 t
 that
 the
 their
 then
 there
 these
 they
 this
 to
 was
 will
 with
--- a/solr/example/example-DIH/solr/solr/conf/synonyms.txt
+++ b/solr/example/example-DIH/solr/solr/conf/synonyms.txt
@ -0,0 +1,31 @@
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #-----------------------------------------------------------------------
 #some test synonym mappings unlikely to appear in real input text
 aaa => aaaa
 bbb => bbbb1 bbbb2
 ccc => cccc1,cccc2
 a\=>a => b\=>b
 a\,a => b\,b
 fooaaa,baraaa,bazaaa
 # Some synonym groups specific to this example
 GB,gib,gigabyte,gigabytes
 MB,mib,megabyte,megabytes
 Television, Televisions, TV, TVs
 #notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
 #after us won't split it into two words.
 # Synonym mappings can be used for spelling correction too
 pixima => pixma