Skip to content

Commit

Permalink
SOLR-16835: Generate Python client from OpenAPI spec (#1681)
Browse files Browse the repository at this point in the history
This commit adds build code to generate a Python client (using
the OpenAPI Generator's 'python' template) and adds the
necessary plumbing to bundle the client into an example directory
used in Solr's LTR module.

Note that nothing in this commit adds this client as a release
artifact, publishes it to pip, etc.
  • Loading branch information
gerlowskija committed Dec 7, 2023
1 parent 3dd1139 commit 5eff6d0
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 45 deletions.
20 changes: 18 additions & 2 deletions solr/api/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ description = 'API - Interfaces and classes used to represent Solrs APIs'

ext {
jsClientDir = "${buildDir}/generated/js"
pythonClientDir = "${buildDir}/generated/python"
openApiSpecDir = "${buildDir}/generated/openapi"
openApiSpecFile = "${project.openApiSpecDir}/openapi.json"
}
Expand All @@ -39,6 +40,10 @@ configurations {
canBeConsumed = true
canBeResolved = false
}
pythonClient {
canBeConsumed = true
canBeResolved = false
}
}

resolve {
Expand All @@ -62,7 +67,6 @@ dependencies {
}

// Non-Java client generation tasks below:

task buildJSClient(type: org.openapitools.generator.gradle.plugin.tasks.GenerateTask) {
generatorName.set("javascript")
inputSpec.set("$openApiSpecFile")
Expand All @@ -72,6 +76,15 @@ task buildJSClient(type: org.openapitools.generator.gradle.plugin.tasks.Generate
generateModelTests.set(false)
}

task buildPythonClient(type: org.openapitools.generator.gradle.plugin.tasks.GenerateTask) {
generatorName.set("python")
inputSpec.set("$openApiSpecFile")
outputDir.set("$pythonClientDir")
packageName.set("solr")
generateApiTests.set(false)
generateModelTests.set(false)
}

tasks.withType(org.openapitools.generator.gradle.plugin.tasks.GenerateTask) {
dependsOn(resolve)
}
Expand All @@ -82,8 +95,11 @@ artifacts {
builtBy resolve
}

// Makes our Javascript client available to the Admin UI build
// Makes generated clients available to other build modules
jsClient file(project.jsClientDir), {
builtBy buildJSClient
}
pythonClient file(project.pythonClientDir), {
builtBy buildPythonClient
}
}
21 changes: 21 additions & 0 deletions solr/modules/ltr/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,15 @@ apply plugin: 'java-library'

description = 'Learning to Rank Package'

ext {
pythonClientCopyDir = layout.projectDirectory.dir("example/solrclient")
}

configurations {
generatedPythonClient
localPythonClientCopy
}

dependencies {
implementation project(':solr:core')
implementation project(':solr:solrj')
Expand All @@ -27,6 +36,12 @@ dependencies {

implementation 'org.slf4j:slf4j-api'

// Used by example scripts
generatedPythonClient project(path: ":solr:api", configuration: "pythonClient")
localPythonClientCopy files(pythonClientCopyDir) {
builtBy "copyPythonClientToExample"
}

testImplementation('org.mockito:mockito-core', {
exclude group: "net.bytebuddy", module: "byte-buddy-agent"
})
Expand All @@ -42,3 +57,9 @@ dependencies {

testImplementation 'commons-io:commons-io'
}

task copyPythonClientToExample(type: Sync) {
group = "Solr Python Client"
from configurations.generatedPythonClient
into project.pythonClientCopyDir
}
1 change: 1 addition & 0 deletions solr/modules/ltr/example/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
solrclient/
12 changes: 10 additions & 2 deletions solr/modules/ltr/example/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,15 @@ Please refer to the Solr Reference Guide's section on [Learning To Rank](https:/

`ln -s /Users/YourNameHere/Downloads/liblinear-2.1 ./modules/ltr/example/liblinear`

3. Extract features, train a reranking model, and deploy it to Solr.
3. Prepare your Python3 environment to run the training script.

`./gradlew solr:modules:ltr:syncPythonClientSourceCode`

`cd solr/modules/ltr/example/solrclient && python3 setup.py install --user && cd -`

This installs a Python client used to talk to Solr, making it and its dependencies available to the training script used below.

4. Extract features, train a reranking model, and deploy it to Solr.

`cd modules/ltr/example`

Expand All @@ -43,7 +51,7 @@ Please refer to the Solr Reference Guide's section on [Learning To Rank](https:/
document pairs of "userQueriesFile" and merges it with the features extracted from Solr into a training
file. That file is used to train a linear model, which is then deployed to Solr for you to rerank results.

4. Search and rerank the results using the trained model
5. Search and rerank the results using the trained model

```
http://localhost:8983/solr/techproducts/query?q=test&rq={!ltr%20model=exampleModel%20reRankDocs=25%20efi.user_query=%27test%27}&fl=price,score,name
Expand Down
76 changes: 35 additions & 41 deletions solr/modules/ltr/example/train_and_upload_demo_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,15 @@

from optparse import OptionParser

solrQueryUrl = ""
import solr
from solr.api import querying_api

def setupSolrClient(host, port):
'''Configures the Solr client with the specified Solr host/port'''
solr_client_config = solr.Configuration(
host = "http://" + host + ":" + str(port) + "/api"
)
solr.Configuration.set_default(solr_client_config)

def setupSolr(collection, host, port, featuresFile, featureStoreName):
'''Sets up solr with the proper features for the test'''
Expand Down Expand Up @@ -46,48 +53,36 @@ def setupSolr(collection, host, port, featuresFile, featureStoreName):
conn.close()


def generateQueries(userQueriesFile, collection, requestHandler, solrFeatureStoreName, efiParams):
def generateQueries(userQueriesFile, solrFeatureStoreName, efiParams):
with open(userQueriesFile) as input:
solrQueryUrls = [] #A list of tuples with solrQueryUrl,solrQuery,docId,scoreForPQ,source
solrQueryInfo = [] #A list of tuples with solrQueryBody,queryText,docId,scoreForPQ,source

for line in input:
line = line.strip();
searchText,docId,score,source = line.split("|");
solrQuery = generateHttpRequest(collection,requestHandler,solrFeatureStoreName,efiParams,searchText,docId)
solrQueryUrls.append((solrQuery,searchText,docId,score,source))
solrQueryBody = generateQueryBody(solrFeatureStoreName,efiParams,searchText,docId)
solrQueryInfo.append((solrQueryBody,searchText,docId,score,source))
return solrQueryInfo;

return solrQueryUrls;

def generateQueryBody(solrFeatureStoreName, efiParams, searchText, docId):
concreteEfiParams = efiParams.replace("$USERQUERY", searchText.strip())
featuresTransformer = "[features store=" + solrFeatureStoreName + " " + concreteEfiParams + "]"
solrJsonParams = {
"query": "id:" + docId,
"fields": ["id", "score", featuresTransformer]
}

def generateHttpRequest(collection, requestHandler, solrFeatureStoreName, efiParams, searchText, docId):
global solrQueryUrl
if len(solrQueryUrl) < 1:
solrQueryUrl = "/".join([ "", "solr", collection, requestHandler ])
solrQueryUrl += ("?fl=" + ",".join([ "id", "score", "[features store="+solrFeatureStoreName+" "+efiParams+"]" ]))
solrQueryUrl += "&q="
solrQueryUrl = solrQueryUrl.replace(" ","+")
solrQueryUrl += urllib.parse.quote_plus("id:")
return solrJsonParams


userQuery = urllib.parse.quote_plus(searchText.strip().replace("'","\\'").replace("/","\\\\/"))
solrQuery = solrQueryUrl + '"' + urllib.parse.quote_plus(docId) + '"' #+ solrQueryUrlEnd
solrQuery = solrQuery.replace("%24USERQUERY", userQuery).replace('$USERQUERY', urllib.parse.quote_plus("\\'" + userQuery + "\\'"))

return solrQuery


def generateTrainingData(solrQueries, host, port):
def generateTrainingData(solrQueries, coreName):
'''Given a list of solr queries, yields a tuple of query , docId , score , source , feature vector for each query.
Feature Vector is a list of strings of form "key=value"'''
conn = http.client.HTTPConnection(host, port)
headers = {"Connection":" keep-alive"}

try:
for queryUrl,query,docId,score,source in solrQueries:
conn.request("GET", queryUrl, headers=headers)
r = conn.getresponse()
msg = r.read()
msgDict = json.loads(msg)
queryClient = querying_api.QueryingApi()
for solrQueryBody,query,docId,score,source in solrQueries:
msgDict = queryClient.json_query("cores", coreName, solrQueryBody)
fv = ""
docs = msgDict['response']['docs']
if len(docs) > 0 and "[features]" in docs[0]:
Expand All @@ -101,19 +96,17 @@ def generateTrainingData(solrQueries, host, port):
print("ERROR FOR: " + docId);
print(msg)
continue;

if r.status == http.client.OK:
#print "http connection was ok for: " + queryUrl
yield(query,docId,score,source,fv.split(","));
else:
raise Exception("Status: {0} {1}\nResponse: {2}".format(r.status, r.reason, msg))
if msgDict.get("response_header") != None:
status = msgDict.get("response_header").get("status")
if status == 0:
#print "http connection was ok for: " + queryUrl
yield(query,docId,score,source,fv.split(","));
else:
raise Exception("Status: {0} \nResponse: {2}".format(status, msgDict))
except Exception as e:
print(msg)
print(e)

conn.close()


def uploadModel(collection, host, port, modelFile, modelName):
modelUrl = "/solr/" + collection + "/schema/model-store"
headers = {'Content-type': 'application/json'}
Expand Down Expand Up @@ -156,13 +149,14 @@ def main(argv=None):
config = json.load(configFile)

print("Uploading features ("+config["solrFeaturesFile"]+") to Solr")
setupSolrClient(config["host"], config["port"])
setupSolr(config["collection"], config["host"], config["port"], config["solrFeaturesFile"], config["solrFeatureStoreName"])

print("Converting user queries ("+config["userQueriesFile"]+") into Solr queries for feature extraction")
reRankQueries = generateQueries(config["userQueriesFile"], config["collection"], config["requestHandler"], config["solrFeatureStoreName"], config["efiParams"])
reRankQueries = generateQueries(config["userQueriesFile"], config["solrFeatureStoreName"], config["efiParams"])

print("Running Solr queries to extract features")
fvGenerator = generateTrainingData(reRankQueries, config["host"], config["port"])
fvGenerator = generateTrainingData(reRankQueries, config["collection"])
formatter = libsvm_formatter.LibSvmFormatter();
formatter.processQueryDocFeatureVector(fvGenerator,config["trainingFile"]);

Expand Down

0 comments on commit 5eff6d0

Please sign in to comment.