diff --git a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/WebDocument.java b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/WebDocument.java index 76201263f82..1dda43ec961 100644 --- a/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/WebDocument.java +++ b/sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/WebDocument.java @@ -61,6 +61,7 @@ import java.util.Date; import java.util.Set; import de.lanlab.larm.fetcher.URLMessage; import de.lanlab.larm.net.HostManager; +import de.lanlab.larm.net.*; /** * a web document of whatever type. generated by a fetcher task @@ -74,10 +75,23 @@ public class WebDocument extends URLMessage protected String title; protected Date lastModified; HashMap fields; + boolean isModified; - public WebDocument(URL url, String mimeType, int resultCode, URL referer, int size, String title, Date lastModified, HostManager hm) + public WebDocument(URLMessage msg) { - super(url, referer, false, null, hm); + super(msg); + this.mimeType = ""; + this.resultCode = -1; + this.size = -1; + this.title = ""; + this.lastModified = new Date(); + clearFields(); + this.isModified = true; + } + + public WebDocument(URL url, String mimeType, int resultCode, URL referer, int size, String title, Date lastModified, HostResolver hm) + { + super(url, referer, URLMessage.LINKTYPE_ANCHOR, null, hm); this.url = url; this.mimeType = mimeType; //this.document = document; @@ -85,7 +99,23 @@ public class WebDocument extends URLMessage this.size = size; this.title = title; this.lastModified = lastModified; - this.fields = new HashMap(7); // expect ~4 fields + clearFields(); + this.isModified = true; + } + + public void setModified(boolean modified) + { + this.isModified = modified; + } + + public boolean isModified() + { + return isModified; + } + + public void clearFields() + { + this.fields = new HashMap(7); } public Set getFieldNames() @@ -119,6 +149,11 @@ public class WebDocument extends URLMessage return lastModified; } + public void setLastModified(Date lastModified) + { + this.lastModified = lastModified; + } + public String getTitle() { return title; @@ -173,6 +208,12 @@ public class WebDocument extends URLMessage this.mimeType = mimeType; } + public void setTitle(String title) + { + this.title = title; + } + + public String getMimeType() { return mimeType;