mirror of https://github.com/apache/lucene.git
181 lines
7.0 KiB
Python
Executable File
181 lines
7.0 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
import sys
|
|
import json
|
|
import httplib
|
|
import urllib
|
|
import libsvm_formatter
|
|
|
|
from optparse import OptionParser
|
|
|
|
solrQueryUrl = ""
|
|
|
|
|
|
def setupSolr(collection, host, port, featuresFile, featureStoreName):
|
|
'''Sets up solr with the proper features for the test'''
|
|
|
|
conn = httplib.HTTPConnection(host, port)
|
|
|
|
baseUrl = "/solr/" + collection
|
|
featureUrl = baseUrl + "/schema/feature-store"
|
|
|
|
conn.request("DELETE", featureUrl+"/"+featureStoreName)
|
|
r = conn.getresponse()
|
|
msg = r.read()
|
|
if (r.status != httplib.OK and
|
|
r.status != httplib.CREATED and
|
|
r.status != httplib.ACCEPTED and
|
|
r.status != httplib.NOT_FOUND):
|
|
raise Exception("Status: {0} {1}\nResponse: {2}".format(r.status, r.reason, msg))
|
|
|
|
|
|
# Add features
|
|
headers = {'Content-type': 'application/json'}
|
|
featuresBody = open(featuresFile)
|
|
|
|
conn.request("POST", featureUrl, featuresBody, headers)
|
|
r = conn.getresponse()
|
|
msg = r.read()
|
|
if (r.status != httplib.OK and
|
|
r.status != httplib.ACCEPTED):
|
|
print r.status
|
|
print ""
|
|
print r.reason;
|
|
raise Exception("Status: {0} {1}\nResponse: {2}".format(r.status, r.reason, msg))
|
|
|
|
conn.close()
|
|
|
|
|
|
def generateQueries(userQueriesFile, collection, requestHandler, solrFeatureStoreName, efiParams):
|
|
with open(userQueriesFile) as input:
|
|
solrQueryUrls = [] #A list of tuples with solrQueryUrl,solrQuery,docId,scoreForPQ,source
|
|
|
|
for line in input:
|
|
line = line.strip();
|
|
searchText,docId,score,source = line.split("|");
|
|
solrQuery = generateHttpRequest(collection,requestHandler,solrFeatureStoreName,efiParams,searchText,docId)
|
|
solrQueryUrls.append((solrQuery,searchText,docId,score,source))
|
|
|
|
return solrQueryUrls;
|
|
|
|
|
|
def generateHttpRequest(collection, requestHandler, solrFeatureStoreName, efiParams, searchText, docId):
|
|
global solrQueryUrl
|
|
if len(solrQueryUrl) < 1:
|
|
solrQueryUrl = "/".join([ "", "solr", collection, requestHandler ])
|
|
solrQueryUrl += ("?fl=" + ",".join([ "id", "score", "[features store="+solrFeatureStoreName+" "+efiParams+"]" ]))
|
|
solrQueryUrl += "&q="
|
|
solrQueryUrl = solrQueryUrl.replace(" ","+")
|
|
solrQueryUrl += urllib.quote_plus("id:")
|
|
|
|
|
|
userQuery = urllib.quote_plus(searchText.strip().replace("'","\\'").replace("/","\\\\/"))
|
|
solrQuery = solrQueryUrl + '"' + urllib.quote_plus(docId) + '"' #+ solrQueryUrlEnd
|
|
solrQuery = solrQuery.replace("%24USERQUERY", userQuery).replace('$USERQUERY', urllib.quote_plus("\\'" + userQuery + "\\'"))
|
|
|
|
return solrQuery
|
|
|
|
|
|
def generateTrainingData(solrQueries, host, port):
|
|
'''Given a list of solr queries, yields a tuple of query , docId , score , source , feature vector for each query.
|
|
Feature Vector is a list of strings of form "key=value"'''
|
|
conn = httplib.HTTPConnection(host, port)
|
|
headers = {"Connection":" keep-alive"}
|
|
|
|
try:
|
|
for queryUrl,query,docId,score,source in solrQueries:
|
|
conn.request("GET", queryUrl, headers=headers)
|
|
r = conn.getresponse()
|
|
msg = r.read()
|
|
msgDict = json.loads(msg)
|
|
fv = ""
|
|
docs = msgDict['response']['docs']
|
|
if len(docs) > 0 and "[features]" in docs[0]:
|
|
if not msgDict['response']['docs'][0]["[features]"] == None:
|
|
fv = msgDict['response']['docs'][0]["[features]"];
|
|
else:
|
|
print "ERROR NULL FV FOR: " + docId;
|
|
print msg
|
|
continue;
|
|
else:
|
|
print "ERROR FOR: " + docId;
|
|
print msg
|
|
continue;
|
|
|
|
if r.status == httplib.OK:
|
|
#print "http connection was ok for: " + queryUrl
|
|
yield(query,docId,score,source,fv.split(","));
|
|
else:
|
|
raise Exception("Status: {0} {1}\nResponse: {2}".format(r.status, r.reason, msg))
|
|
except Exception as e:
|
|
print msg
|
|
print e
|
|
|
|
conn.close()
|
|
|
|
|
|
def uploadModel(collection, host, port, modelFile, modelName):
|
|
modelUrl = "/solr/" + collection + "/schema/model-store"
|
|
headers = {'Content-type': 'application/json'}
|
|
with open(modelFile) as modelBody:
|
|
conn = httplib.HTTPConnection(host, port)
|
|
|
|
conn.request("DELETE", modelUrl+"/"+modelName)
|
|
r = conn.getresponse()
|
|
msg = r.read()
|
|
if (r.status != httplib.OK and
|
|
r.status != httplib.CREATED and
|
|
r.status != httplib.ACCEPTED and
|
|
r.status != httplib.NOT_FOUND):
|
|
raise Exception("Status: {0} {1}\nResponse: {2}".format(r.status, r.reason, msg))
|
|
|
|
conn.request("POST", modelUrl, modelBody, headers)
|
|
r = conn.getresponse()
|
|
msg = r.read()
|
|
if (r.status != httplib.OK and
|
|
r.status != httplib.CREATED and
|
|
r.status != httplib.ACCEPTED):
|
|
raise Exception("Status: {0} {1}\nResponse: {2}".format(r.status, r.reason, msg))
|
|
|
|
|
|
def main(argv=None):
|
|
if argv is None:
|
|
argv = sys.argv
|
|
|
|
parser = OptionParser(usage="usage: %prog [options] ", version="%prog 1.0")
|
|
parser.add_option('-c', '--config',
|
|
dest='configFile',
|
|
help='File of configuration for the test')
|
|
(options, args) = parser.parse_args()
|
|
|
|
if options.configFile == None:
|
|
parser.print_help()
|
|
return 1
|
|
|
|
with open(options.configFile) as configFile:
|
|
config = json.load(configFile)
|
|
|
|
print "Uploading features ("+config["solrFeaturesFile"]+") to Solr"
|
|
setupSolr(config["collection"], config["host"], config["port"], config["solrFeaturesFile"], config["solrFeatureStoreName"])
|
|
|
|
print "Converting user queries ("+config["userQueriesFile"]+") into Solr queries for feature extraction"
|
|
reRankQueries = generateQueries(config["userQueriesFile"], config["collection"], config["requestHandler"], config["solrFeatureStoreName"], config["efiParams"])
|
|
|
|
print "Running Solr queries to extract features"
|
|
fvGenerator = generateTrainingData(reRankQueries, config["host"], config["port"])
|
|
formatter = libsvm_formatter.LibSvmFormatter();
|
|
formatter.processQueryDocFeatureVector(fvGenerator,config["trainingFile"]);
|
|
|
|
print "Training model using '"+config["trainingLibraryLocation"]+" "+config["trainingLibraryOptions"]+"'"
|
|
libsvm_formatter.trainLibSvm(config["trainingLibraryLocation"],config["trainingLibraryOptions"],config["trainingFile"],config["trainedModelFile"])
|
|
|
|
print "Converting trained model ("+config["trainedModelFile"]+") to solr model ("+config["solrModelFile"]+")"
|
|
formatter.convertLibSvmModelToLtrModel(config["trainedModelFile"], config["solrModelFile"], config["solrModelName"], config["solrFeatureStoreName"])
|
|
|
|
print "Uploading model ("+config["solrModelFile"]+") to Solr"
|
|
uploadModel(config["collection"], config["host"], config["port"], config["solrModelFile"], config["solrModelName"])
|
|
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main())
|