# Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # $Id$ # A simple Solr client for python. # This is prototype level code and subject to change. # # quick examples on use: # # from solr import * # c = SolrConnection(host='localhost:8983', persistent=True) # c.add(id='500',name='python test doc') # c.delete('123') # c.commit() # print c.search(q='id:[* TO *]', wt='python', rows='10',indent='on') # data = c.search(q='id:500', wt='python') # print 'first match=', eval(data)['response']['docs'][0] import httplib import socket from xml.dom.minidom import parseString import codecs import urllib class SolrException(Exception): """ An exception thrown by solr connections """ def __init__(self, httpcode, reason=None, body=None): self.httpcode = httpcode self.reason = reason self.body = body def __repr__(self): return 'HTTP code=%s, Reason=%s, body=%s' % ( self.httpcode, self.reason, self.body) def __str__(self): return 'HTTP code=%s, reason=%s' % (self.httpcode, self.reason) class SolrConnection: def __init__(self, host='localhost:8983', solrBase='/solr', persistent=True, postHeaders={}): self.host = host self.solrBase = solrBase self.persistent = persistent self.reconnects = 0 self.encoder = codecs.getencoder('utf-8') #responses from Solr will always be in UTF-8 self.decoder = codecs.getdecoder('utf-8') #a real connection to the server is not opened at this point. self.conn = httplib.HTTPConnection(self.host) #self.conn.set_debuglevel(1000000) self.xmlheaders = {'Content-Type': 'text/xml; charset=utf-8'} self.xmlheaders.update(postHeaders) if not self.persistent: self.xmlheaders['Connection']='close' self.formheaders = {'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8'} if not self.persistent: self.formheaders['Connection']='close' def __str__(self): return 'SolrConnection{host=%s, solrBase=%s, persistent=%s, postHeaders=%s, reconnects=%s}' % \ (self.host, self.solrBase, self.persistent, self.xmlheaders, self.reconnects) def __reconnect(self): self.reconnects+=1 self.conn.close() self.conn.connect() def __errcheck(self,rsp): if rsp.status != 200: ex = SolrException(rsp.status, rsp.reason) try: ex.body = rsp.read() except: pass raise ex return rsp def doPost(self,url,body,headers): try: self.conn.request('POST', url, body, headers) return self.__errcheck(self.conn.getresponse()) except (socket.error,httplib.CannotSendRequest) : #Reconnect in case the connection was broken from the server going down, #the server timing out our persistent connection, or another #network failure. Also catch httplib.CannotSendRequest because the #HTTPConnection object can get in a bad state. self.__reconnect() self.conn.request('POST', url, body, headers) return self.__errcheck(self.conn.getresponse()) def doUpdateXML(self, request): try: rsp = self.doPost(self.solrBase+'/update', request, self.xmlheaders) data = rsp.read() finally: if not self.persistent: self.conn.close() #detect old-style error response (HTTP response code of #200 with a non-zero status. if data.startswith('", "]]>") return self.encoder(val)[0] #to utf8 def escapeKey(self,key): key = key.replace("&", "&") key = key.replace('"', """) return self.encoder(key)[0] #to utf8 def delete(self, id): xstr = ''+self.escapeVal(unicode(id))+'' return self.doUpdateXML(xstr) def deleteByQyery(self, query): xstr = ''+self.escapeVal(query)+'' return self.doUpdateXML(xstr) def __add(self, lst, fields): lst.append('') for f,v in fields.items(): lst.append('') lst.append(self.escapeVal(str(v))) lst.append('') lst.append('') def add(self, **fields): lst=[''] self.__add(lst,fields) lst.append('') xstr = ''.join(lst) return self.doUpdateXML(xstr) def addMany(self, arrOfMap): lst=[''] for doc in arrOfMap: self.__add(lst,doc) lst.append('') xstr = ''.join(lst) return self.doUpdateXML(xstr) def commit(self, waitFlush=True, waitSearcher=True, optimize=False): xstr = '