mirror of https://github.com/apache/lucene.git
added experimental version of a LuceneStorage
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150784 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8ed5fa47fd
commit
bdd627d35c
|
@ -201,9 +201,9 @@ public class LogStorage implements DocumentStorage
|
|||
public WebDocument store(WebDocument doc)
|
||||
{
|
||||
String docInfo = doc.getInfo();
|
||||
if (logContents && isValid && doc.getDocumentBytes() != null)
|
||||
if (logContents && isValid && doc.getField("content") != null)
|
||||
{
|
||||
int offset = writeToPageFile(doc.getDocumentBytes());
|
||||
int offset = writeToPageFile((byte[])doc.getField("content"));
|
||||
docInfo = docInfo + "\t" + pageFileCount + "\t" + offset;
|
||||
}
|
||||
log.logThreadSafe(docInfo);
|
||||
|
|
|
@ -0,0 +1,194 @@
|
|||
package de.lanlab.larm.storage;
|
||||
|
||||
import de.lanlab.larm.util.WebDocument;
|
||||
|
||||
/**
|
||||
* Title: LARM Lanlab Retrieval Machine Description: Copyright: Copyright (c)
|
||||
* Company:
|
||||
*
|
||||
* @author
|
||||
* @version 1.0
|
||||
*/
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import java.util.*;
|
||||
import java.io.*;
|
||||
|
||||
/**
|
||||
* Description of the Class
|
||||
*
|
||||
* @author Administrator
|
||||
* @created 14. Juni 2002
|
||||
*/
|
||||
public class LuceneStorage implements DocumentStorage
|
||||
{
|
||||
|
||||
HashMap fieldInfos = new HashMap();
|
||||
IndexWriter writer;
|
||||
Analyzer analyzer;
|
||||
String indexName;
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for the LuceneStorage object
|
||||
*/
|
||||
public LuceneStorage() { }
|
||||
|
||||
|
||||
/**
|
||||
* Sets the analyzer attribute of the LuceneStorage object
|
||||
*
|
||||
* @param a The new analyzer value
|
||||
*/
|
||||
public void setAnalyzer(Analyzer a)
|
||||
{
|
||||
this.analyzer = a;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the indexName attribute of the LuceneStorage object
|
||||
*
|
||||
* @param name The new indexName value
|
||||
*/
|
||||
public void setIndexName(String name)
|
||||
{
|
||||
this.indexName = name;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the fieldInfo attribute of the LuceneStorage object
|
||||
*
|
||||
* @param fieldName The new fieldInfo value
|
||||
* @param value The new fieldInfo value
|
||||
*/
|
||||
public void setFieldInfo(String fieldName, int value)
|
||||
{
|
||||
fieldInfos.put(fieldName, new Integer(value));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the create attribute of the LuceneStorage object
|
||||
*
|
||||
* @param create The new create value
|
||||
*/
|
||||
public void setCreate(boolean create)
|
||||
{
|
||||
this.create = create;
|
||||
}
|
||||
|
||||
boolean create;
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*/
|
||||
public void open()
|
||||
{
|
||||
System.out.println("opening Lucene storage with index name " + indexName + ")");
|
||||
try
|
||||
{
|
||||
writer = new IndexWriter(indexName, analyzer, create);
|
||||
}
|
||||
catch(IOException e)
|
||||
{
|
||||
System.err.println("IOException occured when opening Lucene Index with index name '" + indexName + "'");
|
||||
e.printStackTrace();
|
||||
}
|
||||
if(writer != null)
|
||||
{
|
||||
System.out.println("lucene storage opened successfully");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public final static int INDEX = 1;
|
||||
public final static int STORE = 2;
|
||||
public final static int TOKEN = 4;
|
||||
|
||||
|
||||
/**
|
||||
* Gets the fieldInfo attribute of the LuceneStorage object
|
||||
*
|
||||
* @param fieldName Description of the Parameter
|
||||
* @param def Description of the Parameter
|
||||
* @return The fieldInfo value
|
||||
*/
|
||||
protected int getFieldInfo(String fieldName, int def)
|
||||
{
|
||||
Integer info = (Integer) fieldInfos.get(fieldName);
|
||||
if (info != null)
|
||||
{
|
||||
return info.intValue();
|
||||
}
|
||||
else
|
||||
{
|
||||
return def;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
protected void addField(Document doc, String name, String value, int defaultIndexFlags)
|
||||
{
|
||||
int flags = getFieldInfo(name, defaultIndexFlags);
|
||||
if (flags != 0)
|
||||
{
|
||||
doc.add(new Field(name, value, (flags & STORE) != 0, (flags & INDEX) != 0, (flags & TOKEN) != 0));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
*
|
||||
* @param webDoc Description of the Parameter
|
||||
* @return Description of the Return Value
|
||||
*/
|
||||
public WebDocument store(WebDocument webDoc)
|
||||
{
|
||||
//System.out.println("storing " + webDoc.getUrl());
|
||||
boolean store;
|
||||
boolean index;
|
||||
boolean token;
|
||||
store = index = token = false;
|
||||
|
||||
Document doc = new Document();
|
||||
int flags;
|
||||
|
||||
addField(doc, "url", webDoc.getUrl().toExternalForm(), STORE | INDEX);
|
||||
addField(doc, "mimetype", webDoc.getMimeType(), STORE | INDEX);
|
||||
// addField(doc, "...", webDoc.getNormalizedURLString(), STORE | INDEX); and so fortg
|
||||
// todo: other fields
|
||||
Set fields = webDoc.getFieldNames();
|
||||
|
||||
for (Iterator it = fields.iterator(); it.hasNext(); )
|
||||
{
|
||||
String fieldName = (String) it.next();
|
||||
Object field = webDoc.getField(fieldName);
|
||||
|
||||
if (field instanceof char[])
|
||||
{
|
||||
addField(doc, fieldName, new String((char[]) field), STORE | INDEX);
|
||||
}
|
||||
else if (field instanceof String)
|
||||
{
|
||||
addField(doc, fieldName, (String)field, STORE | INDEX);
|
||||
}
|
||||
/* else ? */
|
||||
}
|
||||
try
|
||||
{
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
catch(IOException e)
|
||||
{
|
||||
System.err.println("IOException occured when adding document to Lucene index");
|
||||
e.printStackTrace();
|
||||
}
|
||||
return webDoc;
|
||||
}
|
||||
|
||||
//public void set
|
||||
}
|
|
@ -172,7 +172,7 @@ public class SQLServerStorage implements DocumentStorage
|
|||
conn = getConnection();
|
||||
Statement delDoc = conn.createStatement();
|
||||
|
||||
// bisherige Daten löschen, indem die Tabelle neu angelegt wird (geht schneller)
|
||||
// recreate table (faster than delete from table)
|
||||
|
||||
delDoc.executeUpdate("if exists (select * from sysobjects where id = object_id(N'[dbo].[Document]') and OBJECTPROPERTY(id, N'IsUserTable') = 1)drop table [dbo].[Document]");
|
||||
delDoc.executeUpdate("CREATE TABLE [dbo].[Document] ([DO_ID] [int] IDENTITY (1, 1) NOT NULL , [DA_CrawlPass] [int] NULL , [DO_URL] [varchar] (255) NULL , [DO_ContentType] [varchar] (50) NULL , [DO_Data] [text] NULL , [DO_Hashcode] [int] NULL , [DO_ContentLength] [int] NULL , [DO_ContentEncoding] [varchar] (20) NULL , [DO_Data2] [image] NULL, [DO_MimeType] [varchar] (255) NULL) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]"); // löschen
|
||||
|
@ -206,7 +206,7 @@ public class SQLServerStorage implements DocumentStorage
|
|||
addDoc = getStatement();
|
||||
addDoc.setString(1, document.getURLString());
|
||||
addDoc.setString(2, document.getMimeType());
|
||||
addDoc.setBytes(3, document.getDocumentBytes());
|
||||
addDoc.setBytes(3, (byte[])document.getField("content"));
|
||||
addDoc.execute();
|
||||
}
|
||||
catch(SQLException e)
|
||||
|
|
|
@ -95,6 +95,7 @@ public class StoragePipeline implements DocumentStorage, LinkStorage
|
|||
{
|
||||
for (Iterator it = docStorages.iterator(); it.hasNext(); )
|
||||
{
|
||||
System.out.println("opening...");
|
||||
((DocumentStorage) it.next()).open();
|
||||
}
|
||||
isOpen = true;
|
||||
|
|
Loading…
Reference in New Issue