Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SOLR-16835: Generate Python client from OpenAPI spec #1681

Merged
Merged
20 changes: 18 additions & 2 deletions solr/api/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ description = 'API - Interfaces and classes used to represent Solrs APIs'

ext {
jsClientDir = "${buildDir}/generated/js"
pythonClientDir = "${buildDir}/generated/python"
openApiSpecDir = "${buildDir}/generated/openapi"
openApiSpecFile = "${project.openApiSpecDir}/openapi.json"
}
Expand All @@ -39,6 +40,10 @@ configurations {
canBeConsumed = true
canBeResolved = false
}
pythonClient {
canBeConsumed = true
canBeResolved = false
}
}

resolve {
Expand All @@ -62,7 +67,6 @@ dependencies {
}

// Non-Java client generation tasks below:

task buildJSClient(type: org.openapitools.generator.gradle.plugin.tasks.GenerateTask) {
generatorName.set("javascript")
inputSpec.set("$openApiSpecFile")
Expand All @@ -72,6 +76,15 @@ task buildJSClient(type: org.openapitools.generator.gradle.plugin.tasks.Generate
generateModelTests.set(false)
}

task buildPythonClient(type: org.openapitools.generator.gradle.plugin.tasks.GenerateTask) {
generatorName.set("python")
inputSpec.set("$openApiSpecFile")
outputDir.set("$pythonClientDir")
packageName.set("solr")
generateApiTests.set(false)
generateModelTests.set(false)
}

tasks.withType(org.openapitools.generator.gradle.plugin.tasks.GenerateTask) {
dependsOn(resolve)
}
Expand All @@ -82,8 +95,11 @@ artifacts {
builtBy resolve
}

// Makes our Javascript client available to the Admin UI build
// Makes generated clients available to other build modules
jsClient file(project.jsClientDir), {
builtBy buildJSClient
}
pythonClient file(project.pythonClientDir), {
builtBy buildPythonClient
}
}
21 changes: 21 additions & 0 deletions solr/modules/ltr/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,15 @@ apply plugin: 'java-library'

description = 'Learning to Rank Package'

ext {
pythonClientCopyDir = layout.projectDirectory.dir("example/solrclient")
}

configurations {
generatedPythonClient
localPythonClientCopy
}

dependencies {
implementation project(':solr:core')
implementation project(':solr:solrj')
Expand All @@ -27,6 +36,12 @@ dependencies {

implementation 'org.slf4j:slf4j-api'

// Used by example scripts
generatedPythonClient project(path: ":solr:api", configuration: "pythonClient")
localPythonClientCopy files(pythonClientCopyDir) {
builtBy "copyPythonClientToExample"
}

testImplementation('org.mockito:mockito-core', {
exclude group: "net.bytebuddy", module: "byte-buddy-agent"
})
Expand All @@ -42,3 +57,9 @@ dependencies {

testImplementation 'commons-io:commons-io'
}

task copyPythonClientToExample(type: Sync) {
group = "Solr Python Client"
from configurations.generatedPythonClient
into project.pythonClientCopyDir
}
1 change: 1 addition & 0 deletions solr/modules/ltr/example/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
solrclient/
12 changes: 10 additions & 2 deletions solr/modules/ltr/example/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,15 @@ Please refer to the Solr Reference Guide's section on [Learning To Rank](https:/

`ln -s /Users/YourNameHere/Downloads/liblinear-2.1 ./modules/ltr/example/liblinear`

3. Extract features, train a reranking model, and deploy it to Solr.
3. Prepare your Python3 environment to run the training script.

`./gradlew solr:modules:ltr:syncPythonClientSourceCode`

`pushd solrclient && python3 setup.py install --user && popd`
gerlowskija marked this conversation as resolved.
Show resolved Hide resolved

This installs a Python client used to talk to Solr, making it and its dependencies available to the training script used below.

4. Extract features, train a reranking model, and deploy it to Solr.

`cd modules/ltr/example`

Expand All @@ -43,7 +51,7 @@ Please refer to the Solr Reference Guide's section on [Learning To Rank](https:/
document pairs of "userQueriesFile" and merges it with the features extracted from Solr into a training
file. That file is used to train a linear model, which is then deployed to Solr for you to rerank results.

4. Search and rerank the results using the trained model
5. Search and rerank the results using the trained model

```
http://localhost:8983/solr/techproducts/query?q=test&rq={!ltr%20model=exampleModel%20reRankDocs=25%20efi.user_query=%27test%27}&fl=price,score,name
Expand Down
76 changes: 35 additions & 41 deletions solr/modules/ltr/example/train_and_upload_demo_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,15 @@

from optparse import OptionParser

solrQueryUrl = ""
import solr
from solr.api import querying_api

def setupSolrClient(host, port):
'''Configures the Solr client with the specified Solr host/port'''
solr_client_config = solr.Configuration(
host = "http://" + host + ":" + str(port) + "/api"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i guess at some point the /api will get dropped right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that's the plan eventually, yep.

)
solr.Configuration.set_default(solr_client_config)

def setupSolr(collection, host, port, featuresFile, featureStoreName):
'''Sets up solr with the proper features for the test'''
Expand Down Expand Up @@ -46,48 +53,36 @@ def setupSolr(collection, host, port, featuresFile, featureStoreName):
conn.close()


def generateQueries(userQueriesFile, collection, requestHandler, solrFeatureStoreName, efiParams):
def generateQueries(userQueriesFile, solrFeatureStoreName, efiParams):
with open(userQueriesFile) as input:
solrQueryUrls = [] #A list of tuples with solrQueryUrl,solrQuery,docId,scoreForPQ,source
solrQueryInfo = [] #A list of tuples with solrQueryBody,queryText,docId,scoreForPQ,source

for line in input:
line = line.strip();
searchText,docId,score,source = line.split("|");
solrQuery = generateHttpRequest(collection,requestHandler,solrFeatureStoreName,efiParams,searchText,docId)
solrQueryUrls.append((solrQuery,searchText,docId,score,source))
solrQueryBody = generateQueryBody(solrFeatureStoreName,efiParams,searchText,docId)
solrQueryInfo.append((solrQueryBody,searchText,docId,score,source))
return solrQueryInfo;

return solrQueryUrls;

def generateQueryBody(solrFeatureStoreName, efiParams, searchText, docId):
concreteEfiParams = efiParams.replace("$USERQUERY", searchText.strip())
featuresTransformer = "[features store=" + solrFeatureStoreName + " " + concreteEfiParams + "]"
solrJsonParams = {
"query": "id:" + docId,
"fields": ["id", "score", featuresTransformer]
}

def generateHttpRequest(collection, requestHandler, solrFeatureStoreName, efiParams, searchText, docId):
global solrQueryUrl
if len(solrQueryUrl) < 1:
solrQueryUrl = "/".join([ "", "solr", collection, requestHandler ])
solrQueryUrl += ("?fl=" + ",".join([ "id", "score", "[features store="+solrFeatureStoreName+" "+efiParams+"]" ]))
solrQueryUrl += "&q="
solrQueryUrl = solrQueryUrl.replace(" ","+")
solrQueryUrl += urllib.parse.quote_plus("id:")
return solrJsonParams


userQuery = urllib.parse.quote_plus(searchText.strip().replace("'","\\'").replace("/","\\\\/"))
solrQuery = solrQueryUrl + '"' + urllib.parse.quote_plus(docId) + '"' #+ solrQueryUrlEnd
solrQuery = solrQuery.replace("%24USERQUERY", userQuery).replace('$USERQUERY', urllib.parse.quote_plus("\\'" + userQuery + "\\'"))

return solrQuery


def generateTrainingData(solrQueries, host, port):
def generateTrainingData(solrQueries, coreName):
'''Given a list of solr queries, yields a tuple of query , docId , score , source , feature vector for each query.
Feature Vector is a list of strings of form "key=value"'''
conn = http.client.HTTPConnection(host, port)
headers = {"Connection":" keep-alive"}

try:
for queryUrl,query,docId,score,source in solrQueries:
conn.request("GET", queryUrl, headers=headers)
r = conn.getresponse()
msg = r.read()
msgDict = json.loads(msg)
queryClient = querying_api.QueryingApi()
for solrQueryBody,query,docId,score,source in solrQueries:
msgDict = queryClient.json_query("cores", coreName, solrQueryBody)
fv = ""
docs = msgDict['response']['docs']
if len(docs) > 0 and "[features]" in docs[0]:
Expand All @@ -101,19 +96,17 @@ def generateTrainingData(solrQueries, host, port):
print("ERROR FOR: " + docId);
print(msg)
continue;

if r.status == http.client.OK:
#print "http connection was ok for: " + queryUrl
yield(query,docId,score,source,fv.split(","));
else:
raise Exception("Status: {0} {1}\nResponse: {2}".format(r.status, r.reason, msg))
if msgDict.get("response_header") != None:
status = msgDict.get("response_header").get("status")
if status == 0:
#print "http connection was ok for: " + queryUrl
yield(query,docId,score,source,fv.split(","));
else:
raise Exception("Status: {0} \nResponse: {2}".format(status, msgDict))
except Exception as e:
print(msg)
print(e)

conn.close()


def uploadModel(collection, host, port, modelFile, modelName):
modelUrl = "/solr/" + collection + "/schema/model-store"
headers = {'Content-type': 'application/json'}
Expand Down Expand Up @@ -156,13 +149,14 @@ def main(argv=None):
config = json.load(configFile)

print("Uploading features ("+config["solrFeaturesFile"]+") to Solr")
setupSolrClient(config["host"], config["port"])
setupSolr(config["collection"], config["host"], config["port"], config["solrFeaturesFile"], config["solrFeatureStoreName"])

print("Converting user queries ("+config["userQueriesFile"]+") into Solr queries for feature extraction")
reRankQueries = generateQueries(config["userQueriesFile"], config["collection"], config["requestHandler"], config["solrFeatureStoreName"], config["efiParams"])
reRankQueries = generateQueries(config["userQueriesFile"], config["solrFeatureStoreName"], config["efiParams"])

print("Running Solr queries to extract features")
fvGenerator = generateTrainingData(reRankQueries, config["host"], config["port"])
fvGenerator = generateTrainingData(reRankQueries, config["collection"])
formatter = libsvm_formatter.LibSvmFormatter();
formatter.processQueryDocFeatureVector(fvGenerator,config["trainingFile"]);

Expand Down
Loading