From f19f6606fc4c688dd9095b68ae0fd941b82b1f40 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Tue, 10 Oct 2023 12:31:50 -0300 Subject: [PATCH 01/27] first version w bats test --- solr/modules/analysis-extras/build.gradle | 8 + .../OpenNLPDoccatUpdateProcessorFactory.java | 543 ++++++++++++++++++ solr/packaging/test/test_opennlp.bats | 102 ++++ 3 files changed, 653 insertions(+) create mode 100644 solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPDoccatUpdateProcessorFactory.java create mode 100644 solr/packaging/test/test_opennlp.bats diff --git a/solr/modules/analysis-extras/build.gradle b/solr/modules/analysis-extras/build.gradle index 09398b4ac5b..d1523154fad 100644 --- a/solr/modules/analysis-extras/build.gradle +++ b/solr/modules/analysis-extras/build.gradle @@ -19,6 +19,13 @@ apply plugin: 'java-library' description = 'Additional analysis components' +configurations.all { + resolutionStrategy { + force 'org.apache.opennlp:opennlp-tools:2.2.0' + force 'org.apache.opennlp:opennlp-dl:2.2.0' + } +} + dependencies { api project(':solr:core') @@ -33,6 +40,7 @@ dependencies { implementation 'org.apache.lucene:lucene-core' // NOTE: Need to stay on same version of opennlp-tools as lucene-analysis-opennlp implementation 'org.apache.opennlp:opennlp-tools' + implementation 'org.apache.opennlp:opennlp-dl' implementation 'org.slf4j:slf4j-api' testImplementation project(':solr:test-framework') diff --git a/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPDoccatUpdateProcessorFactory.java b/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPDoccatUpdateProcessorFactory.java new file mode 100644 index 00000000000..76f626bda7b --- /dev/null +++ b/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPDoccatUpdateProcessorFactory.java @@ -0,0 +1,543 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR; + +import java.io.File; +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import ai.onnxruntime.OrtException; +import opennlp.dl.InferenceOptions; +import opennlp.dl.doccat.DocumentCategorizerDL; +import opennlp.dl.doccat.scoring.AverageClassificationScoringStrategy; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.SolrInputField; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.Pair; +import org.apache.solr.core.SolrCore; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.update.AddUpdateCommand; +import org.apache.solr.update.processor.FieldMutatingUpdateProcessor.FieldNameSelector; +import org.apache.solr.update.processor.FieldMutatingUpdateProcessorFactory.SelectorParams; +import org.apache.solr.util.plugin.SolrCoreAware; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class OpenNLPDoccatUpdateProcessorFactory extends UpdateRequestProcessorFactory + implements SolrCoreAware { + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + public static final String SOURCE_PARAM = "source"; + public static final String DEST_PARAM = "dest"; + public static final String PATTERN_PARAM = "pattern"; + public static final String REPLACEMENT_PARAM = "replacement"; + public static final String MODEL_PARAM = "modelFile"; + public static final String VOCAB_PARAM = "vocabFile"; + + private SelectorParams srcInclusions = new SelectorParams(); + private Collection srcExclusions = new ArrayList<>(); + + private FieldNameSelector srcSelector = null; + + private String model = null; + private String vocab = null; + private String analyzerFieldType = null; + + /** + * If pattern is null, this this is a literal field name. If pattern is non-null then this is a + * replacement string that may contain meta-characters (ie: capture group identifiers) + * + * @see #pattern + */ + private String dest = null; + /** @see #dest */ + private Pattern pattern = null; + + protected final FieldNameSelector getSourceSelector() { + if (null != srcSelector) return srcSelector; + + throw new SolrException( + SERVER_ERROR, "selector was never initialized, inform(SolrCore) never called???"); + } + + @Override + public void init(NamedList args) { + + System.out.println("In OpenNLP doccat init()"); + + // high level (loose) check for which type of config we have. + // + // individual init methods do more strict syntax checking + if (0 <= args.indexOf(SOURCE_PARAM, 0) && 0 <= args.indexOf(DEST_PARAM, 0)) { + initSourceSelectorSyntax(args); + } else if (0 <= args.indexOf(PATTERN_PARAM, 0) && 0 <= args.indexOf(REPLACEMENT_PARAM, 0)) { + initSimpleRegexReplacement(args); + } else { + throw new SolrException( + SERVER_ERROR, + "A combination of either '" + + SOURCE_PARAM + + "' + '" + + DEST_PARAM + + "', or '" + + REPLACEMENT_PARAM + + "' + '" + + PATTERN_PARAM + + "' init params are mandatory"); + } + + Object modelParam = args.remove(MODEL_PARAM); + if (null == modelParam) { + throw new SolrException(SERVER_ERROR, "Missing required init param '" + MODEL_PARAM + "'"); + } + if (!(modelParam instanceof CharSequence)) { + throw new SolrException(SERVER_ERROR, "Init param '" + MODEL_PARAM + "' must be a "); + } + model = modelParam.toString(); + System.out.println("In OpenNLP doccat - model: " + model); + + Object vocabParam = args.remove(VOCAB_PARAM); + if (null == vocabParam) { + throw new SolrException(SERVER_ERROR, "Missing required init param '" + VOCAB_PARAM + "'"); + } + if (!(vocabParam instanceof CharSequence)) { + throw new SolrException(SERVER_ERROR, "Init param '" + VOCAB_PARAM + "' must be a "); + } + vocab = vocabParam.toString(); + + if (0 < args.size()) { + throw new SolrException(SERVER_ERROR, "Unexpected init param(s): '" + args.getName(0) + "'"); + } + + super.init(args); + } + + /** + * init helper method that should only be called when we know for certain that both the "source" + * and "dest" init params do not exist. + */ + private void initSimpleRegexReplacement(NamedList args) { + // The syntactic sugar for the case where there is only one regex pattern for source and the + // same pattern + // is used for the destination pattern... + // + // pattern != null && replacement != null + // + // ...as top level elements, with no other config options specified + + // if we got here we know we had pattern and replacement, now check for the other two so that + // we can give a better + // message than "unexpected" + if (0 <= args.indexOf(SOURCE_PARAM, 0) || 0 <= args.indexOf(DEST_PARAM, 0)) { + throw new SolrException( + SERVER_ERROR, + "Short hand syntax must not be mixed with full syntax. Found " + + PATTERN_PARAM + + " and " + + REPLACEMENT_PARAM + + " but also found " + + SOURCE_PARAM + + " or " + + DEST_PARAM); + } + + assert args.indexOf(SOURCE_PARAM, 0) < 0; + + Object patt = args.remove(PATTERN_PARAM); + Object replacement = args.remove(REPLACEMENT_PARAM); + + if (null == patt || null == replacement) { + throw new SolrException( + SERVER_ERROR, + "Init params '" + + PATTERN_PARAM + + "' and '" + + REPLACEMENT_PARAM + + "' are both mandatory if '" + + SOURCE_PARAM + + "' and '" + + DEST_PARAM + + "' are not both specified"); + } + + if (0 != args.size()) { + throw new SolrException( + SERVER_ERROR, + "Init params '" + + REPLACEMENT_PARAM + + "' and '" + + PATTERN_PARAM + + "' must be children of '" + + DEST_PARAM + + "' to be combined with other options."); + } + + if (!(replacement instanceof String)) { + throw new SolrException( + SERVER_ERROR, "Init param '" + REPLACEMENT_PARAM + "' must be a string (i.e. )"); + } + if (!(patt instanceof String)) { + throw new SolrException( + SERVER_ERROR, "Init param '" + PATTERN_PARAM + "' must be a string (i.e. )"); + } + + dest = replacement.toString(); + try { + this.pattern = Pattern.compile(patt.toString()); + } catch (PatternSyntaxException pe) { + throw new SolrException( + SERVER_ERROR, + "Init param " + PATTERN_PARAM + " is not a valid regex pattern: " + patt, + pe); + } + srcInclusions = new SelectorParams(); + srcInclusions.fieldRegex = Collections.singletonList(this.pattern); + } + + /** + * init helper method that should only be called when we know for certain that both the "source" + * and "dest" init params do exist. + */ + private void initSourceSelectorSyntax(NamedList args) { + // Full and complete syntax where source and dest are mandatory. + // + // source may be a single string or a selector. + // dest may be a single string or list containing pattern and replacement + // + // source != null && dest != null + + // if we got here we know we had source and dest, now check for the other two so that we can + // give a better + // message than "unexpected" + if (0 <= args.indexOf(PATTERN_PARAM, 0) || 0 <= args.indexOf(REPLACEMENT_PARAM, 0)) { + throw new SolrException( + SERVER_ERROR, + "Short hand syntax must not be mixed with full syntax. Found " + + SOURCE_PARAM + + " and " + + DEST_PARAM + + " but also found " + + PATTERN_PARAM + + " or " + + REPLACEMENT_PARAM); + } + + Object d = args.remove(DEST_PARAM); + assert null != d; + + List sources = args.getAll(SOURCE_PARAM); + assert null != sources; + + if (1 == sources.size()) { + if (sources.get(0) instanceof NamedList) { + // nested set of selector options + NamedList selectorConfig = (NamedList) args.remove(SOURCE_PARAM); + + srcInclusions = parseSelectorParams(selectorConfig); + + List excList = selectorConfig.getAll("exclude"); + + for (Object excObj : excList) { + if (null == excObj) { + throw new SolrException( + SERVER_ERROR, "Init param '" + SOURCE_PARAM + "' child 'exclude' can not be null"); + } + if (!(excObj instanceof NamedList)) { + throw new SolrException( + SERVER_ERROR, "Init param '" + SOURCE_PARAM + "' child 'exclude' must be "); + } + NamedList exc = (NamedList) excObj; + srcExclusions.add(parseSelectorParams(exc)); + if (0 < exc.size()) { + throw new SolrException( + SERVER_ERROR, + "Init param '" + + SOURCE_PARAM + + "' has unexpected 'exclude' sub-param(s): '" + + selectorConfig.getName(0) + + "'"); + } + // call once per instance + selectorConfig.remove("exclude"); + } + + if (0 < selectorConfig.size()) { + throw new SolrException( + SERVER_ERROR, + "Init param '" + + SOURCE_PARAM + + "' contains unexpected child param(s): '" + + selectorConfig.getName(0) + + "'"); + } + // consume from the named list so it doesn't interfere with subsequent processing + sources.remove(0); + } + } + if (1 <= sources.size()) { + // source better be one or more strings + srcInclusions.fieldName = new HashSet<>(args.removeConfigArgs("source")); + } + if (srcInclusions == null) { + throw new SolrException( + SERVER_ERROR, + "Init params do not specify any field from which to extract entities, please supply either " + + SOURCE_PARAM + + " and " + + DEST_PARAM + + " or " + + PATTERN_PARAM + + " and " + + REPLACEMENT_PARAM + + ". See javadocs" + + "for OpenNLPExtractNamedEntitiesUpdateProcessor for further details."); + } + + if (d instanceof NamedList) { + NamedList destList = (NamedList) d; + + Object patt = destList.remove(PATTERN_PARAM); + Object replacement = destList.remove(REPLACEMENT_PARAM); + + if (null == patt || null == replacement) { + throw new SolrException( + SERVER_ERROR, + "Init param '" + + DEST_PARAM + + "' children '" + + PATTERN_PARAM + + "' and '" + + REPLACEMENT_PARAM + + "' are both mandatory and can not be null"); + } + if (!(patt instanceof String && replacement instanceof String)) { + throw new SolrException( + SERVER_ERROR, + "Init param '" + + DEST_PARAM + + "' children '" + + PATTERN_PARAM + + "' and '" + + REPLACEMENT_PARAM + + "' must both be strings (i.e. )"); + } + if (0 != destList.size()) { + throw new SolrException( + SERVER_ERROR, + "Init param '" + + DEST_PARAM + + "' has unexpected children: '" + + destList.getName(0) + + "'"); + } + + try { + this.pattern = Pattern.compile(patt.toString()); + } catch (PatternSyntaxException pe) { + throw new SolrException( + SERVER_ERROR, + "Init param '" + + DEST_PARAM + + "' child '" + + PATTERN_PARAM + + " is not a valid regex pattern: " + + patt, + pe); + } + dest = replacement.toString(); + + } else if (d instanceof String) { + dest = d.toString(); + } else { + throw new SolrException( + SERVER_ERROR, + "Init param '" + + DEST_PARAM + + "' must either be a string " + + "(i.e. ) or a list (i.e. ) containing '" + + PATTERN_PARAM + + "' and '" + + REPLACEMENT_PARAM); + } + } + + @Override + public void inform(final SolrCore core) { + + srcSelector = + FieldMutatingUpdateProcessor.createFieldNameSelector( + core.getResourceLoader(), + core, + srcInclusions, + FieldMutatingUpdateProcessor.SELECT_NO_FIELDS); + + for (SelectorParams exc : srcExclusions) { + srcSelector = + FieldMutatingUpdateProcessor.wrap( + srcSelector, + FieldMutatingUpdateProcessor.createFieldNameSelector( + core.getResourceLoader(), + core, + exc, + FieldMutatingUpdateProcessor.SELECT_NO_FIELDS)); + } + } + + @Override + public final UpdateRequestProcessor getInstance( + SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) { + final FieldNameSelector srcSelector = getSourceSelector(); + return new UpdateRequestProcessor(next) { + + DocumentCategorizerDL documentCategorizerDL = null; + + { + // Initialize the categorizer. + final File modelFile = new File(model); + final File vocabFile = new File(vocab); + System.out.println("In OpenNLP doccat initializing the documentCategorizerDL"); + try { + documentCategorizerDL = new DocumentCategorizerDL(modelFile, vocabFile, getCategories(),new AverageClassificationScoringStrategy(), new InferenceOptions()); + } catch (IOException e) { + e.printStackTrace(); + } catch (OrtException e) { + e.printStackTrace(); + } + } + + @Override + public void processAdd(AddUpdateCommand cmd) throws IOException { + + final SolrInputDocument doc = cmd.getSolrInputDocument(); + + // Destination may be regex replace string, or "{EntityType}" replaced by + // each entity's type, both of which can cause multiple output fields. + Map destMap = new HashMap<>(); + + // preserve initial values + for (final String fname : doc.getFieldNames()) { + if (!srcSelector.shouldMutate(fname)) continue; + + Collection srcFieldValues = doc.getFieldValues(fname); + if (srcFieldValues == null || srcFieldValues.isEmpty()) continue; + + String resolvedDest = dest; + + if (pattern != null) { + Matcher matcher = pattern.matcher(fname); + if (matcher.find()) { + resolvedDest = matcher.replaceAll(dest); + } else { + log.debug( + "srcSelector.shouldMutate('{}') returned true, " + + "but replacement pattern did not match, field skipped.", + fname); + continue; + } + } + + for (Object val : srcFieldValues) { + for (Pair entity : classify(val)) { + SolrInputField destField = null; + //String classification = entity.first(); + String classificationValue = entity.second(); + final String resolved = resolvedDest; + if (doc.containsKey(resolved)) { + destField = doc.getField(resolved); + } else { + SolrInputField targetField = destMap.get(resolved); + if (targetField == null) { + destField = new SolrInputField(resolved); + } else { + destField = targetField; + } + } + destField.addValue(classificationValue); + + // put it in map to avoid concurrent modification... + destMap.put(resolved, destField); + } + } + } + + for (Map.Entry entry : destMap.entrySet()) { + doc.put(entry.getKey(), entry.getValue()); + } + super.processAdd(cmd); + } + + private List> classify(Object srcFieldValue) { + + String fullText = srcFieldValue.toString(); + + // Send the fullText to the model for classification. + System.out.println("In OpenNLP doccat callling categorizer()"); + final double[] result = documentCategorizerDL.categorize(new String[] {fullText}); + + // Add the categories to the list and return it. + // Just take the top category for now. + // TODO: Allow for a threshold value for returning categories. + + List> classifications = new ArrayList<>(); + + String bestCategory = documentCategorizerDL.getBestCategory(result); + System.out.println("In OpenNLP doccat - best category = " + bestCategory); + + Pair pair = new Pair<>("classification", bestCategory); + classifications.add(pair); + + return classifications; + } + }; + } + + /** macro */ + private static SelectorParams parseSelectorParams(NamedList args) { + return FieldMutatingUpdateProcessorFactory.parseSelectorParams(args); + } + + private Map getCategories() { + + // TODO: Read these from the Solr config for the processor. + + final Map categories = new HashMap<>(); + + categories.put(0, "very bad"); + categories.put(1, "bad"); + categories.put(2, "neutral"); + categories.put(3, "good"); + categories.put(4, "very good"); + + return categories; + } +} diff --git a/solr/packaging/test/test_opennlp.bats b/solr/packaging/test/test_opennlp.bats new file mode 100644 index 00000000000..6b6dae42958 --- /dev/null +++ b/solr/packaging/test/test_opennlp.bats @@ -0,0 +1,102 @@ +#!/usr/bin/env bats + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load bats_helper + +setup_file() { + common_clean_setup + +} + +teardown_file() { + common_setup + solr stop -all +} + +setup() { + common_setup +} + +teardown() { + # save a snapshot of SOLR_HOME for failed tests + save_home_on_failure +} + +@test "Check lifecycle of sentiment classification" { + + # GPU versions is linux and windows only, not OSX. So swap jars. + rm -f ${SOLR_TIP}/modules/analysis-extras/lib/onnxruntime_gpu-1.14.0.jar + # restore + #curl --insecure -o ${SOLR_TIP}/modules/analysis-extras/lib/onnxruntime-1.14.0.jar https://repo1.maven.org/maven2/com/microsoft/onnxruntime/onnxruntime/1.14.0/onnxruntime-1.14.0.jar + cp /Users/epugh/Documents/projects/solr-epugh/onnxruntime-1.14.0.jar ${SOLR_TIP}/modules/analysis-extras/lib/ + + run ls -alh ${SOLR_TIP}/modules/analysis-extras/lib + refute_output --partial "onnxruntime_gpu" + assert_output --partial "onnxruntime-1.14.0.jar" + + # Can't figure out magic policy stuff to allow loading ONNX, so disable security manager. + export SOLR_SECURITY_MANAGER_ENABLED=false + + solr start -c -Dsolr.modules=analysis-extras + solr assert --started http://localhost:${SOLR_PORT}/solr --timeout 5000 + + run solr create -c COLL_NAME + assert_output --partial "Created collection 'COLL_NAME'" + + curl -X POST -H 'Content-type:application/json' --data-binary '{ + "add-field":{ + "name":"name", + "type":"string", + "stored":true } + }' http://localhost:${SOLR_PORT}/solr/COLL_NAME/schema + + curl -X POST -H 'Content-type:application/json' --data-binary '{ + "add-field":{ + "name":"name_sentiment", + "type":"string", + "stored":true } + }' http://localhost:${SOLR_PORT}/solr/COLL_NAME/schema + + run curl -X POST -H 'Content-type:application/json' -d '{ + "add-updateprocessor": { + "name": "sentimentClassifier", + "class": "solr.processor.OpenNLPDoccatUpdateProcessorFactory", + "modelFile": "/Users/epugh/Documents/projects/solr-epugh/exported/model.onnx", + "vocabFile": "/Users/epugh/Documents/projects/solr-epugh/exported/vocab.txt", + "source": "name", + "dest": "name_sentiment" + } + }' "http://localhost:${SOLR_PORT}/solr/COLL_NAME/config" + assert_output --partial '"status":0' + + run curl -X POST -H 'Content-type:application/json'd -d '[ + { + "id":"good", + "name" : "Jeff, i am so glad you came to this conference." + }, + { + "id":"bad", + "name" : "The name of this conference is really really terrible to say." + } + ]' "http://localhost:${SOLR_PORT}/solr/COLL_NAME/update/json?update.chain=onnx-opennlp&commit=true" + + assert_output --partial '"status":0' + + run curl -X GET "http://localhost:${SOLR_PORT}/solr/COLL_NAME/select?q=*:*" + + assert_output --partial "dude" +} From 48fd37bda5f987849191d962d35caee5110c07ac Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Tue, 10 Oct 2023 16:26:08 -0300 Subject: [PATCH 02/27] not positive I need this --- solr/packaging/test/test_opennlp.bats | 8 +++++--- versions.props | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/solr/packaging/test/test_opennlp.bats b/solr/packaging/test/test_opennlp.bats index 6b6dae42958..0b76d114eff 100644 --- a/solr/packaging/test/test_opennlp.bats +++ b/solr/packaging/test/test_opennlp.bats @@ -92,11 +92,13 @@ teardown() { "id":"bad", "name" : "The name of this conference is really really terrible to say." } - ]' "http://localhost:${SOLR_PORT}/solr/COLL_NAME/update/json?update.chain=onnx-opennlp&commit=true" + ]' "http://localhost:${SOLR_PORT}/solr/COLL_NAME/update/json?processor=sentimentClassifier&commit=true" assert_output --partial '"status":0' - run curl -X GET "http://localhost:${SOLR_PORT}/solr/COLL_NAME/select?q=*:*" + run curl -X GET "http://localhost:${SOLR_PORT}/solr/COLL_NAME/select?q=id:good" + assert_output --partial '"name_sentiment":"very good"' - assert_output --partial "dude" + run curl -X GET "http://localhost:${SOLR_PORT}/solr/COLL_NAME/select?q=id:bad" + assert_output --partial '"name_sentiment":"very bad"' } diff --git a/versions.props b/versions.props index a19f4b3f67c..04364b37378 100644 --- a/versions.props +++ b/versions.props @@ -70,3 +70,4 @@ org.semver4j:semver4j=5.2.1 org.slf4j:*=2.0.9 org.xerial.snappy:snappy-java=1.1.10.5 software.amazon.awssdk:*=2.20.155 +org.apache.opennlp:opennlp-dl=2.2.0 From cecd4f6b983ff0d0bed7635db2fbe3abe8438e48 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Tue, 10 Oct 2023 17:06:58 -0300 Subject: [PATCH 03/27] new name, and OpenNLP is kind of a implmentation detail ;-). --- ...y.java => DocumentCategorizationUpdateProcessorFactory.java} | 2 +- solr/packaging/test/test_opennlp.bats | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/{OpenNLPDoccatUpdateProcessorFactory.java => DocumentCategorizationUpdateProcessorFactory.java} (99%) diff --git a/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPDoccatUpdateProcessorFactory.java b/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizationUpdateProcessorFactory.java similarity index 99% rename from solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPDoccatUpdateProcessorFactory.java rename to solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizationUpdateProcessorFactory.java index 76f626bda7b..732073acca8 100644 --- a/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPDoccatUpdateProcessorFactory.java +++ b/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizationUpdateProcessorFactory.java @@ -52,7 +52,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class OpenNLPDoccatUpdateProcessorFactory extends UpdateRequestProcessorFactory +public class DocumentCategorizationUpdateProcessorFactory extends UpdateRequestProcessorFactory implements SolrCoreAware { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); diff --git a/solr/packaging/test/test_opennlp.bats b/solr/packaging/test/test_opennlp.bats index 0b76d114eff..7494b202322 100644 --- a/solr/packaging/test/test_opennlp.bats +++ b/solr/packaging/test/test_opennlp.bats @@ -74,7 +74,7 @@ teardown() { run curl -X POST -H 'Content-type:application/json' -d '{ "add-updateprocessor": { "name": "sentimentClassifier", - "class": "solr.processor.OpenNLPDoccatUpdateProcessorFactory", + "class": "solr.processor.DocumentCategorizationUpdateProcessorFactory", "modelFile": "/Users/epugh/Documents/projects/solr-epugh/exported/model.onnx", "vocabFile": "/Users/epugh/Documents/projects/solr-epugh/exported/vocab.txt", "source": "name", From 10abbd0bef85e54b0216dae589a7abe3c3363de7 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Tue, 10 Oct 2023 18:04:19 -0300 Subject: [PATCH 04/27] baby steps, found that packagesstore blows up killing solr when i post 600 mb file. --- solr/packaging/test/test_opennlp.bats | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/solr/packaging/test/test_opennlp.bats b/solr/packaging/test/test_opennlp.bats index 7494b202322..c9d9ef89f85 100644 --- a/solr/packaging/test/test_opennlp.bats +++ b/solr/packaging/test/test_opennlp.bats @@ -51,7 +51,7 @@ teardown() { # Can't figure out magic policy stuff to allow loading ONNX, so disable security manager. export SOLR_SECURITY_MANAGER_ENABLED=false - solr start -c -Dsolr.modules=analysis-extras + solr start -c -Dsolr.modules=analysis-extras -Denable.packages=true solr assert --started http://localhost:${SOLR_PORT}/solr --timeout 5000 run solr create -c COLL_NAME @@ -70,6 +70,12 @@ teardown() { "type":"string", "stored":true } }' http://localhost:${SOLR_PORT}/solr/COLL_NAME/schema + + run curl --data-binary @/Users/epugh/Documents/projects/solr-epugh/exported/vocab.txt -X PUT http://localhost:${SOLR_PORT}/api/cluster/files/models/sentiment/vocab.txt + assert_output --partial '"status":0' + + #run curl --data-binary @/Users/epugh/Documents/projects/solr-epugh/exported/model.onnx -X PUT http://localhost:${SOLR_PORT}/api/cluster/files/models/sentiment/modex.onnx + run curl -X POST -H 'Content-type:application/json' -d '{ "add-updateprocessor": { From dd4cc898dc97000751c7207a1c4f70d6e58c3675 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Tue, 10 Oct 2023 18:11:18 -0300 Subject: [PATCH 05/27] remove resolutionStraregy force from gradle build.. --- solr/modules/analysis-extras/build.gradle | 7 ------- versions.lock | 6 ++++-- versions.props | 2 +- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/solr/modules/analysis-extras/build.gradle b/solr/modules/analysis-extras/build.gradle index d1523154fad..ec37466026a 100644 --- a/solr/modules/analysis-extras/build.gradle +++ b/solr/modules/analysis-extras/build.gradle @@ -19,13 +19,6 @@ apply plugin: 'java-library' description = 'Additional analysis components' -configurations.all { - resolutionStrategy { - force 'org.apache.opennlp:opennlp-tools:2.2.0' - force 'org.apache.opennlp:opennlp-dl:2.2.0' - } -} - dependencies { api project(':solr:core') diff --git a/versions.lock b/versions.lock index 442f19b8a5f..759ffa8efc3 100644 --- a/versions.lock +++ b/versions.lock @@ -67,6 +67,7 @@ com.jayway.jsonpath:json-path:2.8.0 (2 constraints: 6c12952c) com.lmax:disruptor:3.4.4 (1 constraints: 0d050a36) com.mchange:c3p0:0.9.5.5 (1 constraints: c80c571b) com.mchange:mchange-commons-java:0.2.19 (1 constraints: 84075b75) +com.microsoft.onnxruntime:onnxruntime_gpu:1.14.0 (1 constraints: 030d5a21) com.pff:java-libpst:0.9.3 (1 constraints: 630cfa01) com.rometools:rome:1.18.0 (1 constraints: 910c870e) com.rometools:rome-utils:1.18.0 (1 constraints: 10095d96) @@ -234,7 +235,8 @@ org.apache.lucene:lucene-spatial-extras:9.8.0 (1 constraints: 13053036) org.apache.lucene:lucene-spatial3d:9.8.0 (1 constraints: c010bfb9) org.apache.lucene:lucene-suggest:9.8.0 (1 constraints: 13053036) org.apache.lucene:lucene-test-framework:9.8.0 (1 constraints: 13053036) -org.apache.opennlp:opennlp-tools:1.9.4 (2 constraints: f91d5c6d) +org.apache.opennlp:opennlp-dl:2.2.0 (1 constraints: 0605fb35) +org.apache.opennlp:opennlp-tools:2.2.0 (4 constraints: ce2f8ea1) org.apache.pdfbox:fontbox:2.0.26 (1 constraints: 180b72d8) org.apache.pdfbox:jbig2-imageio:3.0.4 (1 constraints: 5e0cef01) org.apache.pdfbox:jempbox:1.8.16 (1 constraints: 970c910e) @@ -347,7 +349,7 @@ org.reactivestreams:reactive-streams:1.0.4 (3 constraints: 3f2b77fd) org.semver4j:semver4j:5.2.1 (1 constraints: 0a050b36) org.slf4j:jcl-over-slf4j:2.0.9 (3 constraints: cf17cfa6) org.slf4j:jul-to-slf4j:2.0.9 (3 constraints: 29286349) -org.slf4j:slf4j-api:2.0.9 (59 constraints: dd104075) +org.slf4j:slf4j-api:2.0.9 (61 constraints: 582c8b87) org.tallison:isoparser:1.9.41.7 (1 constraints: fb0c5528) org.tallison:jmatio:1.5 (1 constraints: ff0b57e9) org.tallison:metadata-extractor:2.17.1.0 (1 constraints: f00c3b28) diff --git a/versions.props b/versions.props index 04364b37378..43d4903335e 100644 --- a/versions.props +++ b/versions.props @@ -70,4 +70,4 @@ org.semver4j:semver4j=5.2.1 org.slf4j:*=2.0.9 org.xerial.snappy:snappy-java=1.1.10.5 software.amazon.awssdk:*=2.20.155 -org.apache.opennlp:opennlp-dl=2.2.0 +org.apache.opennlp:opennlp*=2.2.0 From af18d0cb125a9ccd53778104ddd54108b6817441 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Tue, 10 Oct 2023 18:21:36 -0300 Subject: [PATCH 06/27] match name in the underlying OpenNLP project. bikeshedding! --- ...tory.java => DocumentCategorizerUpdateProcessorFactory.java} | 2 +- solr/packaging/test/test_opennlp.bats | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/{DocumentCategorizationUpdateProcessorFactory.java => DocumentCategorizerUpdateProcessorFactory.java} (99%) diff --git a/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizationUpdateProcessorFactory.java b/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java similarity index 99% rename from solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizationUpdateProcessorFactory.java rename to solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java index 732073acca8..836d6543c09 100644 --- a/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizationUpdateProcessorFactory.java +++ b/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java @@ -52,7 +52,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class DocumentCategorizationUpdateProcessorFactory extends UpdateRequestProcessorFactory +public class DocumentCategorizerUpdateProcessorFactory extends UpdateRequestProcessorFactory implements SolrCoreAware { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); diff --git a/solr/packaging/test/test_opennlp.bats b/solr/packaging/test/test_opennlp.bats index c9d9ef89f85..1c9462d521a 100644 --- a/solr/packaging/test/test_opennlp.bats +++ b/solr/packaging/test/test_opennlp.bats @@ -80,7 +80,7 @@ teardown() { run curl -X POST -H 'Content-type:application/json' -d '{ "add-updateprocessor": { "name": "sentimentClassifier", - "class": "solr.processor.DocumentCategorizationUpdateProcessorFactory", + "class": "solr.processor.DocumentCategorizerUpdateProcessorFactory", "modelFile": "/Users/epugh/Documents/projects/solr-epugh/exported/model.onnx", "vocabFile": "/Users/epugh/Documents/projects/solr-epugh/exported/vocab.txt", "source": "name", From 695062978cea21010cbdd11c19123898cb068f61 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Tue, 10 Oct 2023 19:58:52 -0300 Subject: [PATCH 07/27] tidy --- ...mentCategorizerUpdateProcessorFactory.java | 48 +++++++++++++++---- solr/packaging/test/test_opennlp.bats | 16 +++---- 2 files changed, 47 insertions(+), 17 deletions(-) diff --git a/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java b/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java index 836d6543c09..da96861d315 100644 --- a/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java +++ b/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java @@ -19,9 +19,13 @@ import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR; +import ai.onnxruntime.OrtException; import java.io.File; import java.io.IOException; import java.lang.invoke.MethodHandles; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -32,8 +36,6 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; - -import ai.onnxruntime.OrtException; import opennlp.dl.InferenceOptions; import opennlp.dl.doccat.DocumentCategorizerDL; import opennlp.dl.doccat.scoring.AverageClassificationScoringStrategy; @@ -43,6 +45,7 @@ import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.Pair; import org.apache.solr.core.SolrCore; +import org.apache.solr.filestore.PackageStoreAPI; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.update.AddUpdateCommand; @@ -64,6 +67,8 @@ public class DocumentCategorizerUpdateProcessorFactory extends UpdateRequestProc public static final String MODEL_PARAM = "modelFile"; public static final String VOCAB_PARAM = "vocabFile"; + private Path solrHome; + private SelectorParams srcInclusions = new SelectorParams(); private Collection srcExclusions = new ArrayList<>(); @@ -80,7 +85,9 @@ public class DocumentCategorizerUpdateProcessorFactory extends UpdateRequestProc * @see #pattern */ private String dest = null; - /** @see #dest */ + /** + * @see #dest + */ private Pattern pattern = null; protected final FieldNameSelector getSourceSelector() { @@ -393,7 +400,7 @@ private void initSourceSelectorSyntax(NamedList args) { @Override public void inform(final SolrCore core) { - + this.solrHome = Paths.get(core.getCoreContainer().getSolrHome()); srcSelector = FieldMutatingUpdateProcessor.createFieldNameSelector( core.getResourceLoader(), @@ -423,11 +430,34 @@ public final UpdateRequestProcessor getInstance( { // Initialize the categorizer. - final File modelFile = new File(model); - final File vocabFile = new File(vocab); + + var path = solrHome.resolve(PackageStoreAPI.PACKAGESTORE_DIRECTORY); + File modelFile = new File(model); + File vocabFile = new File(vocab); + + if (!Files.exists(modelFile.toPath())) { + System.out.println("modelFile doesnt exist:" + modelFile.toPath()); + modelFile = new File(path + "/" + model); + System.out.println("New file:" + modelFile); + } + if (!Files.exists(vocabFile.toPath())) { + System.out.println("vocabFile doesnt exist:" + vocabFile.toPath()); + vocabFile = new File(path + "/" + vocab); + } + + System.out.println("model is " + model); + System.out.println("does modelFile exist?" + modelFile.exists()); + System.out.println("model full path is " + modelFile.getAbsolutePath()); + System.out.println("In OpenNLP doccat initializing the documentCategorizerDL"); try { - documentCategorizerDL = new DocumentCategorizerDL(modelFile, vocabFile, getCategories(),new AverageClassificationScoringStrategy(), new InferenceOptions()); + documentCategorizerDL = + new DocumentCategorizerDL( + modelFile, + vocabFile, + getCategories(), + new AverageClassificationScoringStrategy(), + new InferenceOptions()); } catch (IOException e) { e.printStackTrace(); } catch (OrtException e) { @@ -469,7 +499,7 @@ public void processAdd(AddUpdateCommand cmd) throws IOException { for (Object val : srcFieldValues) { for (Pair entity : classify(val)) { SolrInputField destField = null; - //String classification = entity.first(); + // String classification = entity.first(); String classificationValue = entity.second(); final String resolved = resolvedDest; if (doc.containsKey(resolved)) { @@ -501,7 +531,7 @@ private List> classify(Object srcFieldValue) { String fullText = srcFieldValue.toString(); // Send the fullText to the model for classification. - System.out.println("In OpenNLP doccat callling categorizer()"); + System.out.println("In OpenNLP doccat calling categorizer()"); final double[] result = documentCategorizerDL.categorize(new String[] {fullText}); // Add the categories to the list and return it. diff --git a/solr/packaging/test/test_opennlp.bats b/solr/packaging/test/test_opennlp.bats index 1c9462d521a..ffe0f96efb4 100644 --- a/solr/packaging/test/test_opennlp.bats +++ b/solr/packaging/test/test_opennlp.bats @@ -51,7 +51,7 @@ teardown() { # Can't figure out magic policy stuff to allow loading ONNX, so disable security manager. export SOLR_SECURITY_MANAGER_ENABLED=false - solr start -c -Dsolr.modules=analysis-extras -Denable.packages=true + solr start -m 4g -c -Dsolr.modules=analysis-extras -Denable.packages=true solr assert --started http://localhost:${SOLR_PORT}/solr --timeout 5000 run solr create -c COLL_NAME @@ -62,27 +62,27 @@ teardown() { "name":"name", "type":"string", "stored":true } - }' http://localhost:${SOLR_PORT}/solr/COLL_NAME/schema + }' "http://localhost:${SOLR_PORT}/solr/COLL_NAME/schema" curl -X POST -H 'Content-type:application/json' --data-binary '{ "add-field":{ "name":"name_sentiment", "type":"string", "stored":true } - }' http://localhost:${SOLR_PORT}/solr/COLL_NAME/schema + }' "http://localhost:${SOLR_PORT}/solr/COLL_NAME/schema" - run curl --data-binary @/Users/epugh/Documents/projects/solr-epugh/exported/vocab.txt -X PUT http://localhost:${SOLR_PORT}/api/cluster/files/models/sentiment/vocab.txt + run curl --data-binary @/Users/epugh/Documents/projects/solr-epugh/exported/vocab.txt -X PUT "http://localhost:${SOLR_PORT}/api/cluster/files/models/sentiment/vocab.txt" assert_output --partial '"status":0' - #run curl --data-binary @/Users/epugh/Documents/projects/solr-epugh/exported/model.onnx -X PUT http://localhost:${SOLR_PORT}/api/cluster/files/models/sentiment/modex.onnx - + run curl --data-binary @/Users/epugh/Documents/projects/solr-epugh/exported/model.onnx -X PUT "http://localhost:${SOLR_PORT}/api/cluster/files/models/sentiment/model.onnx" + assert_output --partial '"status":0' run curl -X POST -H 'Content-type:application/json' -d '{ "add-updateprocessor": { "name": "sentimentClassifier", "class": "solr.processor.DocumentCategorizerUpdateProcessorFactory", - "modelFile": "/Users/epugh/Documents/projects/solr-epugh/exported/model.onnx", - "vocabFile": "/Users/epugh/Documents/projects/solr-epugh/exported/vocab.txt", + "modelFile": "models/sentiment/model.onnx", + "vocabFile": "models/sentiment/vocab.txt", "source": "name", "dest": "name_sentiment" } From 1adba222288c0700944ab5281e75d0074725c39b Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Wed, 11 Oct 2023 12:22:20 -0300 Subject: [PATCH 08/27] reorder params --- versions.props | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/versions.props b/versions.props index 43d4903335e..3075bb25f77 100644 --- a/versions.props +++ b/versions.props @@ -48,6 +48,7 @@ org.apache.httpcomponents:httpmime=4.5.14 org.apache.kerby:*=1.0.1 org.apache.logging.log4j:*=2.20.0 org.apache.lucene:*=9.8.0 +org.apache.opennlp:opennlp*=2.2.0 org.apache.tika:*=1.28.5 org.apache.tomcat:annotations-api=6.0.53 org.apache.zookeeper:*=3.9.0 @@ -70,4 +71,3 @@ org.semver4j:semver4j=5.2.1 org.slf4j:*=2.0.9 org.xerial.snappy:snappy-java=1.1.10.5 software.amazon.awssdk:*=2.20.155 -org.apache.opennlp:opennlp*=2.2.0 From bcbf16d084339009f6aa471bb989ff02a5191072 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Wed, 11 Oct 2023 12:22:44 -0300 Subject: [PATCH 09/27] log formatting --- ...cumentCategorizerUpdateProcessorFactory.java | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java b/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java index da96861d315..092cec297cb 100644 --- a/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java +++ b/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java @@ -100,8 +100,6 @@ protected final FieldNameSelector getSourceSelector() { @Override public void init(NamedList args) { - System.out.println("In OpenNLP doccat init()"); - // high level (loose) check for which type of config we have. // // individual init methods do more strict syntax checking @@ -436,20 +434,13 @@ public final UpdateRequestProcessor getInstance( File vocabFile = new File(vocab); if (!Files.exists(modelFile.toPath())) { - System.out.println("modelFile doesnt exist:" + modelFile.toPath()); modelFile = new File(path + "/" + model); - System.out.println("New file:" + modelFile); } if (!Files.exists(vocabFile.toPath())) { - System.out.println("vocabFile doesnt exist:" + vocabFile.toPath()); vocabFile = new File(path + "/" + vocab); } - System.out.println("model is " + model); - System.out.println("does modelFile exist?" + modelFile.exists()); - System.out.println("model full path is " + modelFile.getAbsolutePath()); - - System.out.println("In OpenNLP doccat initializing the documentCategorizerDL"); + log.info("initializing the documentCategorizerDL"); try { documentCategorizerDL = new DocumentCategorizerDL( @@ -531,7 +522,7 @@ private List> classify(Object srcFieldValue) { String fullText = srcFieldValue.toString(); // Send the fullText to the model for classification. - System.out.println("In OpenNLP doccat calling categorizer()"); + log.info("calling categorizer()"); final double[] result = documentCategorizerDL.categorize(new String[] {fullText}); // Add the categories to the list and return it. @@ -541,7 +532,9 @@ private List> classify(Object srcFieldValue) { List> classifications = new ArrayList<>(); String bestCategory = documentCategorizerDL.getBestCategory(result); - System.out.println("In OpenNLP doccat - best category = " + bestCategory); + if (log.isInfoEnabled()) { + log.info("best category = {}", bestCategory); + } Pair pair = new Pair<>("classification", bestCategory); classifications.add(pair); From e634b1c3e03684df3473611805e345f5deefb51d Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Wed, 11 Oct 2023 12:26:23 -0300 Subject: [PATCH 10/27] regenerate... --- solr/licenses/opennlp-dl-2.2.0.jar.sha1 | 1 + solr/licenses/opennlp-tools-1.9.4.jar.sha1 | 1 - solr/licenses/opennlp-tools-2.2.0.jar.sha1 | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 solr/licenses/opennlp-dl-2.2.0.jar.sha1 delete mode 100644 solr/licenses/opennlp-tools-1.9.4.jar.sha1 create mode 100644 solr/licenses/opennlp-tools-2.2.0.jar.sha1 diff --git a/solr/licenses/opennlp-dl-2.2.0.jar.sha1 b/solr/licenses/opennlp-dl-2.2.0.jar.sha1 new file mode 100644 index 00000000000..e43acdce4bd --- /dev/null +++ b/solr/licenses/opennlp-dl-2.2.0.jar.sha1 @@ -0,0 +1 @@ +1d95a6b5e67c036ee4762f67ffcd0c99de3a06f3 diff --git a/solr/licenses/opennlp-tools-1.9.4.jar.sha1 b/solr/licenses/opennlp-tools-1.9.4.jar.sha1 deleted file mode 100644 index fa7e85dd48f..00000000000 --- a/solr/licenses/opennlp-tools-1.9.4.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -dd5c4a6d82453bcccb78ba4ac90f166366dde12b diff --git a/solr/licenses/opennlp-tools-2.2.0.jar.sha1 b/solr/licenses/opennlp-tools-2.2.0.jar.sha1 new file mode 100644 index 00000000000..41bcf646a9d --- /dev/null +++ b/solr/licenses/opennlp-tools-2.2.0.jar.sha1 @@ -0,0 +1 @@ +ce819219217257486d7174b09f8800082f1e999c From 778e3f45fd07249678a5dbf772abc309b0dd4169 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Wed, 11 Oct 2023 12:14:04 -0400 Subject: [PATCH 11/27] dynamically grab the models from hugging face. --- solr/packaging/test/test_opennlp.bats | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/solr/packaging/test/test_opennlp.bats b/solr/packaging/test/test_opennlp.bats index ffe0f96efb4..30477ded05a 100644 --- a/solr/packaging/test/test_opennlp.bats +++ b/solr/packaging/test/test_opennlp.bats @@ -36,7 +36,18 @@ teardown() { save_home_on_failure } +# This BATS style test is really just to help explore the space of Modern NLP in +# Apache Solr, versus a "true" integration test that I want to have run regularly. +# On the other hand, since integrationg NLP requires a lot of steps, maybe having this +# long test as an "integration" test is something we decide is okay? +# I also have dreams of incorporating this as code snippets in a Tutorial via the ascii doc tags +# like we use for the SolrJ code snippets. That way we know the snippets continue to work! @test "Check lifecycle of sentiment classification" { + + pip install transformers onnx onnxruntime + python -m transformers.onnx -m nlptown/bert-base-multilingual-uncased-sentiment --feature sequence-classification ${SOLR_TIP}/models/sentiment + + curl --insecure -o ${SOLR_TIP}/models/sentiment/vocab.txt https://huggingface.co/nlptown/bert-base-multilingual-uncased-sentiment/resolve/main/vocab.txt # GPU versions is linux and windows only, not OSX. So swap jars. rm -f ${SOLR_TIP}/modules/analysis-extras/lib/onnxruntime_gpu-1.14.0.jar @@ -71,10 +82,10 @@ teardown() { "stored":true } }' "http://localhost:${SOLR_PORT}/solr/COLL_NAME/schema" - run curl --data-binary @/Users/epugh/Documents/projects/solr-epugh/exported/vocab.txt -X PUT "http://localhost:${SOLR_PORT}/api/cluster/files/models/sentiment/vocab.txt" + run curl --data-binary @${SOLR_TIP}/models/sentiment/vocab.txt -X PUT "http://localhost:${SOLR_PORT}/api/cluster/files/models/sentiment/vocab.txt" assert_output --partial '"status":0' - run curl --data-binary @/Users/epugh/Documents/projects/solr-epugh/exported/model.onnx -X PUT "http://localhost:${SOLR_PORT}/api/cluster/files/models/sentiment/model.onnx" + run curl --data-binary @${SOLR_TIP}/models/sentiment/model.onnx -X PUT "http://localhost:${SOLR_PORT}/api/cluster/files/models/sentiment/model.onnx" assert_output --partial '"status":0' run curl -X POST -H 'Content-type:application/json' -d '{ From 405153b64337ee1c7f8e66d3cbd11b202ecffc27 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Wed, 11 Oct 2023 12:25:58 -0400 Subject: [PATCH 12/27] use logging structure for stack traces --- .../processor/DocumentCategorizerUpdateProcessorFactory.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java b/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java index 092cec297cb..11f924b2dda 100644 --- a/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java +++ b/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java @@ -450,9 +450,9 @@ public final UpdateRequestProcessor getInstance( new AverageClassificationScoringStrategy(), new InferenceOptions()); } catch (IOException e) { - e.printStackTrace(); + log.warn("Attempted to initialize documentCategorizerDL", e); } catch (OrtException e) { - e.printStackTrace(); + log.warn("Attempted to initialize documentCategorizerDL", e); } } From 4e54090d2a10638dfd24311428084da56a043c89 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Fri, 13 Oct 2023 16:55:46 -0400 Subject: [PATCH 13/27] download the correct jar, and document the work to remove this need in the OpenNLP project --- solr/packaging/test/test_opennlp.bats | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/solr/packaging/test/test_opennlp.bats b/solr/packaging/test/test_opennlp.bats index 30477ded05a..a56ca1b3566 100644 --- a/solr/packaging/test/test_opennlp.bats +++ b/solr/packaging/test/test_opennlp.bats @@ -50,10 +50,9 @@ teardown() { curl --insecure -o ${SOLR_TIP}/models/sentiment/vocab.txt https://huggingface.co/nlptown/bert-base-multilingual-uncased-sentiment/resolve/main/vocab.txt # GPU versions is linux and windows only, not OSX. So swap jars. + # Pending https://issues.apache.org/jira/browse/OPENNLP-1515 rm -f ${SOLR_TIP}/modules/analysis-extras/lib/onnxruntime_gpu-1.14.0.jar - # restore - #curl --insecure -o ${SOLR_TIP}/modules/analysis-extras/lib/onnxruntime-1.14.0.jar https://repo1.maven.org/maven2/com/microsoft/onnxruntime/onnxruntime/1.14.0/onnxruntime-1.14.0.jar - cp /Users/epugh/Documents/projects/solr-epugh/onnxruntime-1.14.0.jar ${SOLR_TIP}/modules/analysis-extras/lib/ + curl --insecure -o ${SOLR_TIP}/modules/analysis-extras/lib/onnxruntime-1.14.0.jar https://repo1.maven.org/maven2/com/microsoft/onnxruntime/onnxruntime/1.14.0/onnxruntime-1.14.0.jar run ls -alh ${SOLR_TIP}/modules/analysis-extras/lib refute_output --partial "onnxruntime_gpu" From 9e30c293bd7c57b635a6e02e51c0715c57aa636e Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Mon, 27 Nov 2023 12:51:03 -0500 Subject: [PATCH 14/27] Upgrade to OpenNLP 2.3.1 and related dependencies. --- .../jackson-annotations-2.15.2.jar.sha1 | 1 - .../jackson-annotations-2.15.3.jar.sha1 | 1 + solr/licenses/jackson-core-2.15.2.jar.sha1 | 1 - solr/licenses/jackson-core-2.15.3.jar.sha1 | 1 + .../licenses/jackson-databind-2.15.2.jar.sha1 | 1 - .../licenses/jackson-databind-2.15.3.jar.sha1 | 1 + .../jackson-dataformat-cbor-2.15.2.jar.sha1 | 1 - .../jackson-dataformat-cbor-2.15.3.jar.sha1 | 1 + .../jackson-dataformat-smile-2.15.2.jar.sha1 | 1 - .../jackson-dataformat-smile-2.15.3.jar.sha1 | 1 + .../jackson-dataformat-xml-2.15.2.jar.sha1 | 1 - .../jackson-dataformat-xml-2.15.3.jar.sha1 | 1 + .../jackson-datatype-jdk8-2.15.2.jar.sha1 | 1 - .../jackson-datatype-jdk8-2.15.3.jar.sha1 | 1 + .../jackson-datatype-jsr310-2.15.2.jar.sha1 | 1 - .../jackson-datatype-jsr310-2.15.3.jar.sha1 | 1 + ...on-module-jaxb-annotations-2.15.2.jar.sha1 | 1 - ...on-module-jaxb-annotations-2.15.3.jar.sha1 | 1 + .../jackson-module-kotlin-2.15.2.jar.sha1 | 1 - .../jackson-module-kotlin-2.15.3.jar.sha1 | 1 + ...son-module-parameter-names-2.15.2.jar.sha1 | 1 - ...son-module-parameter-names-2.15.3.jar.sha1 | 1 + solr/licenses/onnxruntime-1.15.0.jar.sha1 | 1 + solr/licenses/opennlp-dl-2.2.0.jar.sha1 | 1 - solr/licenses/opennlp-dl-2.3.1.jar.sha1 | 1 + solr/licenses/opennlp-tools-2.2.0.jar.sha1 | 1 - solr/licenses/opennlp-tools-2.3.1.jar.sha1 | 1 + versions.lock | 30 +++++++++---------- versions.props | 2 +- 29 files changed, 30 insertions(+), 29 deletions(-) delete mode 100644 solr/licenses/jackson-annotations-2.15.2.jar.sha1 create mode 100644 solr/licenses/jackson-annotations-2.15.3.jar.sha1 delete mode 100644 solr/licenses/jackson-core-2.15.2.jar.sha1 create mode 100644 solr/licenses/jackson-core-2.15.3.jar.sha1 delete mode 100644 solr/licenses/jackson-databind-2.15.2.jar.sha1 create mode 100644 solr/licenses/jackson-databind-2.15.3.jar.sha1 delete mode 100644 solr/licenses/jackson-dataformat-cbor-2.15.2.jar.sha1 create mode 100644 solr/licenses/jackson-dataformat-cbor-2.15.3.jar.sha1 delete mode 100644 solr/licenses/jackson-dataformat-smile-2.15.2.jar.sha1 create mode 100644 solr/licenses/jackson-dataformat-smile-2.15.3.jar.sha1 delete mode 100644 solr/licenses/jackson-dataformat-xml-2.15.2.jar.sha1 create mode 100644 solr/licenses/jackson-dataformat-xml-2.15.3.jar.sha1 delete mode 100644 solr/licenses/jackson-datatype-jdk8-2.15.2.jar.sha1 create mode 100644 solr/licenses/jackson-datatype-jdk8-2.15.3.jar.sha1 delete mode 100644 solr/licenses/jackson-datatype-jsr310-2.15.2.jar.sha1 create mode 100644 solr/licenses/jackson-datatype-jsr310-2.15.3.jar.sha1 delete mode 100644 solr/licenses/jackson-module-jaxb-annotations-2.15.2.jar.sha1 create mode 100644 solr/licenses/jackson-module-jaxb-annotations-2.15.3.jar.sha1 delete mode 100644 solr/licenses/jackson-module-kotlin-2.15.2.jar.sha1 create mode 100644 solr/licenses/jackson-module-kotlin-2.15.3.jar.sha1 delete mode 100644 solr/licenses/jackson-module-parameter-names-2.15.2.jar.sha1 create mode 100644 solr/licenses/jackson-module-parameter-names-2.15.3.jar.sha1 create mode 100644 solr/licenses/onnxruntime-1.15.0.jar.sha1 delete mode 100644 solr/licenses/opennlp-dl-2.2.0.jar.sha1 create mode 100644 solr/licenses/opennlp-dl-2.3.1.jar.sha1 delete mode 100644 solr/licenses/opennlp-tools-2.2.0.jar.sha1 create mode 100644 solr/licenses/opennlp-tools-2.3.1.jar.sha1 diff --git a/solr/licenses/jackson-annotations-2.15.2.jar.sha1 b/solr/licenses/jackson-annotations-2.15.2.jar.sha1 deleted file mode 100644 index 9f2de051b36..00000000000 --- a/solr/licenses/jackson-annotations-2.15.2.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -4724a65ac8e8d156a24898d50fd5dbd3642870b8 diff --git a/solr/licenses/jackson-annotations-2.15.3.jar.sha1 b/solr/licenses/jackson-annotations-2.15.3.jar.sha1 new file mode 100644 index 00000000000..e9e7e4e23f7 --- /dev/null +++ b/solr/licenses/jackson-annotations-2.15.3.jar.sha1 @@ -0,0 +1 @@ +79baf4e605eb3bbb60b1c475d44a7aecceea1d60 diff --git a/solr/licenses/jackson-core-2.15.2.jar.sha1 b/solr/licenses/jackson-core-2.15.2.jar.sha1 deleted file mode 100644 index e94a74ef317..00000000000 --- a/solr/licenses/jackson-core-2.15.2.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -a6fe1836469a69b3ff66037c324d75fc66ef137c diff --git a/solr/licenses/jackson-core-2.15.3.jar.sha1 b/solr/licenses/jackson-core-2.15.3.jar.sha1 new file mode 100644 index 00000000000..a0c13d95f67 --- /dev/null +++ b/solr/licenses/jackson-core-2.15.3.jar.sha1 @@ -0,0 +1 @@ +60d600567c1862840397bf9ff5a92398edc5797b diff --git a/solr/licenses/jackson-databind-2.15.2.jar.sha1 b/solr/licenses/jackson-databind-2.15.2.jar.sha1 deleted file mode 100644 index 292a15ec3aa..00000000000 --- a/solr/licenses/jackson-databind-2.15.2.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -9353b021f10c307c00328f52090de2bdb4b6ff9c diff --git a/solr/licenses/jackson-databind-2.15.3.jar.sha1 b/solr/licenses/jackson-databind-2.15.3.jar.sha1 new file mode 100644 index 00000000000..5d31fd2ab96 --- /dev/null +++ b/solr/licenses/jackson-databind-2.15.3.jar.sha1 @@ -0,0 +1 @@ +a734bc2c47a9453c4efa772461a3aeb273c010d9 diff --git a/solr/licenses/jackson-dataformat-cbor-2.15.2.jar.sha1 b/solr/licenses/jackson-dataformat-cbor-2.15.2.jar.sha1 deleted file mode 100644 index 99dad0d6f2d..00000000000 --- a/solr/licenses/jackson-dataformat-cbor-2.15.2.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -baafc85c70765594add14bd93f3efd68e1945b76 diff --git a/solr/licenses/jackson-dataformat-cbor-2.15.3.jar.sha1 b/solr/licenses/jackson-dataformat-cbor-2.15.3.jar.sha1 new file mode 100644 index 00000000000..6b5eefe89da --- /dev/null +++ b/solr/licenses/jackson-dataformat-cbor-2.15.3.jar.sha1 @@ -0,0 +1 @@ +c30a4e69e760401a98b9fa458a4f2db6fe392d7f diff --git a/solr/licenses/jackson-dataformat-smile-2.15.2.jar.sha1 b/solr/licenses/jackson-dataformat-smile-2.15.2.jar.sha1 deleted file mode 100644 index cf03f49842e..00000000000 --- a/solr/licenses/jackson-dataformat-smile-2.15.2.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -16d1dd22f7d641459ed056399d4f7df0220f1176 diff --git a/solr/licenses/jackson-dataformat-smile-2.15.3.jar.sha1 b/solr/licenses/jackson-dataformat-smile-2.15.3.jar.sha1 new file mode 100644 index 00000000000..05f7218fa03 --- /dev/null +++ b/solr/licenses/jackson-dataformat-smile-2.15.3.jar.sha1 @@ -0,0 +1 @@ +0b7688240ac7943b981cdf6592ee38101332bf5c diff --git a/solr/licenses/jackson-dataformat-xml-2.15.2.jar.sha1 b/solr/licenses/jackson-dataformat-xml-2.15.2.jar.sha1 deleted file mode 100644 index 1a3fd34d037..00000000000 --- a/solr/licenses/jackson-dataformat-xml-2.15.2.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -e7e9038dee5c1adb1ebd07d3669e0e1182ac5b60 diff --git a/solr/licenses/jackson-dataformat-xml-2.15.3.jar.sha1 b/solr/licenses/jackson-dataformat-xml-2.15.3.jar.sha1 new file mode 100644 index 00000000000..a2b54986fbc --- /dev/null +++ b/solr/licenses/jackson-dataformat-xml-2.15.3.jar.sha1 @@ -0,0 +1 @@ +50f061b3ff15979ee0c784c657f5ea8ba7b920c5 diff --git a/solr/licenses/jackson-datatype-jdk8-2.15.2.jar.sha1 b/solr/licenses/jackson-datatype-jdk8-2.15.2.jar.sha1 deleted file mode 100644 index 5203e6d75df..00000000000 --- a/solr/licenses/jackson-datatype-jdk8-2.15.2.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -66a50e089cfd2f93896b9b6f7a734cea7bcf2f31 diff --git a/solr/licenses/jackson-datatype-jdk8-2.15.3.jar.sha1 b/solr/licenses/jackson-datatype-jdk8-2.15.3.jar.sha1 new file mode 100644 index 00000000000..42ec1c24029 --- /dev/null +++ b/solr/licenses/jackson-datatype-jdk8-2.15.3.jar.sha1 @@ -0,0 +1 @@ +80158cb020c7bd4e4ba94d8d752a65729dc943b2 diff --git a/solr/licenses/jackson-datatype-jsr310-2.15.2.jar.sha1 b/solr/licenses/jackson-datatype-jsr310-2.15.2.jar.sha1 deleted file mode 100644 index 236746c6766..00000000000 --- a/solr/licenses/jackson-datatype-jsr310-2.15.2.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -30d16ec2aef6d8094c5e2dce1d95034ca8b6cb42 diff --git a/solr/licenses/jackson-datatype-jsr310-2.15.3.jar.sha1 b/solr/licenses/jackson-datatype-jsr310-2.15.3.jar.sha1 new file mode 100644 index 00000000000..4e2f50bfe68 --- /dev/null +++ b/solr/licenses/jackson-datatype-jsr310-2.15.3.jar.sha1 @@ -0,0 +1 @@ +4a20a0e104931bfa72f24ef358c2eb63f1ef2aaf diff --git a/solr/licenses/jackson-module-jaxb-annotations-2.15.2.jar.sha1 b/solr/licenses/jackson-module-jaxb-annotations-2.15.2.jar.sha1 deleted file mode 100644 index 62271773656..00000000000 --- a/solr/licenses/jackson-module-jaxb-annotations-2.15.2.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -6a22fd1c7b0f9788e81eea32c11dc8c1ba421f18 diff --git a/solr/licenses/jackson-module-jaxb-annotations-2.15.3.jar.sha1 b/solr/licenses/jackson-module-jaxb-annotations-2.15.3.jar.sha1 new file mode 100644 index 00000000000..d5c42a13c1c --- /dev/null +++ b/solr/licenses/jackson-module-jaxb-annotations-2.15.3.jar.sha1 @@ -0,0 +1 @@ +74e8ef60b65b42051258465f06c06195e61e92f2 diff --git a/solr/licenses/jackson-module-kotlin-2.15.2.jar.sha1 b/solr/licenses/jackson-module-kotlin-2.15.2.jar.sha1 deleted file mode 100644 index 0a109b33692..00000000000 --- a/solr/licenses/jackson-module-kotlin-2.15.2.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -0475c9721f5a2a5b7bea57d504bd8b0586d1ba5e diff --git a/solr/licenses/jackson-module-kotlin-2.15.3.jar.sha1 b/solr/licenses/jackson-module-kotlin-2.15.3.jar.sha1 new file mode 100644 index 00000000000..6398f822e34 --- /dev/null +++ b/solr/licenses/jackson-module-kotlin-2.15.3.jar.sha1 @@ -0,0 +1 @@ +036ea7813ada694e67f562ff1dc6f3b47883e373 diff --git a/solr/licenses/jackson-module-parameter-names-2.15.2.jar.sha1 b/solr/licenses/jackson-module-parameter-names-2.15.2.jar.sha1 deleted file mode 100644 index d571d29d5e9..00000000000 --- a/solr/licenses/jackson-module-parameter-names-2.15.2.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -75f8d2788db20f6c587c7a19e94fb6248c314241 diff --git a/solr/licenses/jackson-module-parameter-names-2.15.3.jar.sha1 b/solr/licenses/jackson-module-parameter-names-2.15.3.jar.sha1 new file mode 100644 index 00000000000..04d6421d340 --- /dev/null +++ b/solr/licenses/jackson-module-parameter-names-2.15.3.jar.sha1 @@ -0,0 +1 @@ +8d251b90c5358677e7d8161e0c2488e6f84f49da diff --git a/solr/licenses/onnxruntime-1.15.0.jar.sha1 b/solr/licenses/onnxruntime-1.15.0.jar.sha1 new file mode 100644 index 00000000000..28b4f0672ce --- /dev/null +++ b/solr/licenses/onnxruntime-1.15.0.jar.sha1 @@ -0,0 +1 @@ +6db39caba947384ce09c3071e84cb73437a77e74 diff --git a/solr/licenses/opennlp-dl-2.2.0.jar.sha1 b/solr/licenses/opennlp-dl-2.2.0.jar.sha1 deleted file mode 100644 index e43acdce4bd..00000000000 --- a/solr/licenses/opennlp-dl-2.2.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -1d95a6b5e67c036ee4762f67ffcd0c99de3a06f3 diff --git a/solr/licenses/opennlp-dl-2.3.1.jar.sha1 b/solr/licenses/opennlp-dl-2.3.1.jar.sha1 new file mode 100644 index 00000000000..12621381b08 --- /dev/null +++ b/solr/licenses/opennlp-dl-2.3.1.jar.sha1 @@ -0,0 +1 @@ +8ff28619e6a377fe467b47274f39fd1fc9b2c303 diff --git a/solr/licenses/opennlp-tools-2.2.0.jar.sha1 b/solr/licenses/opennlp-tools-2.2.0.jar.sha1 deleted file mode 100644 index 41bcf646a9d..00000000000 --- a/solr/licenses/opennlp-tools-2.2.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -ce819219217257486d7174b09f8800082f1e999c diff --git a/solr/licenses/opennlp-tools-2.3.1.jar.sha1 b/solr/licenses/opennlp-tools-2.3.1.jar.sha1 new file mode 100644 index 00000000000..b900c01bb8c --- /dev/null +++ b/solr/licenses/opennlp-tools-2.3.1.jar.sha1 @@ -0,0 +1 @@ +40a6c39a0911ed1fb17171ea043b8bf673bd71b5 diff --git a/versions.lock b/versions.lock index 759ffa8efc3..d080ff5893b 100644 --- a/versions.lock +++ b/versions.lock @@ -6,13 +6,13 @@ com.carrotsearch:hppc:0.9.1 (2 constraints: ad0fc9a6) com.carrotsearch.randomizedtesting:randomizedtesting-runner:2.8.1 (2 constraints: cf1501e2) com.cybozu.labs:langdetect:1.1-20120112 (1 constraints: 5c066d5e) com.epam:parso:2.0.14 (1 constraints: 8e0c750e) -com.fasterxml.jackson:jackson-bom:2.15.2 (12 constraints: 4ef8ad55) -com.fasterxml.jackson.core:jackson-annotations:2.15.2 (10 constraints: 9cbe6d17) -com.fasterxml.jackson.core:jackson-core:2.15.2 (13 constraints: 4302e328) -com.fasterxml.jackson.core:jackson-databind:2.15.2 (18 constraints: 8d63878e) -com.fasterxml.jackson.dataformat:jackson-dataformat-cbor:2.15.2 (2 constraints: 631c9af1) -com.fasterxml.jackson.dataformat:jackson-dataformat-smile:2.15.2 (1 constraints: ba0eab66) -com.fasterxml.jackson.module:jackson-module-jaxb-annotations:2.15.2 (2 constraints: a724cae0) +com.fasterxml.jackson:jackson-bom:2.15.3 (12 constraints: 59f89c63) +com.fasterxml.jackson.core:jackson-annotations:2.15.3 (10 constraints: a2bea21f) +com.fasterxml.jackson.core:jackson-core:2.15.3 (13 constraints: 4c026138) +com.fasterxml.jackson.core:jackson-databind:2.15.3 (19 constraints: 9d70f32b) +com.fasterxml.jackson.dataformat:jackson-dataformat-cbor:2.15.3 (2 constraints: 641c9bf1) +com.fasterxml.jackson.dataformat:jackson-dataformat-smile:2.15.3 (1 constraints: bb0eac66) +com.fasterxml.jackson.module:jackson-module-jaxb-annotations:2.15.3 (2 constraints: a82409e1) com.fasterxml.woodstox:woodstox-core:6.5.1 (2 constraints: a0239c84) com.github.ben-manes.caffeine:caffeine:3.1.8 (1 constraints: 0e050536) com.github.jai-imageio:jai-imageio-core:1.4.0 (1 constraints: 5c0ced01) @@ -67,7 +67,7 @@ com.jayway.jsonpath:json-path:2.8.0 (2 constraints: 6c12952c) com.lmax:disruptor:3.4.4 (1 constraints: 0d050a36) com.mchange:c3p0:0.9.5.5 (1 constraints: c80c571b) com.mchange:mchange-commons-java:0.2.19 (1 constraints: 84075b75) -com.microsoft.onnxruntime:onnxruntime_gpu:1.14.0 (1 constraints: 030d5a21) +com.microsoft.onnxruntime:onnxruntime:1.15.0 (1 constraints: 040d5d21) com.pff:java-libpst:0.9.3 (1 constraints: 630cfa01) com.rometools:rome:1.18.0 (1 constraints: 910c870e) com.rometools:rome-utils:1.18.0 (1 constraints: 10095d96) @@ -235,8 +235,8 @@ org.apache.lucene:lucene-spatial-extras:9.8.0 (1 constraints: 13053036) org.apache.lucene:lucene-spatial3d:9.8.0 (1 constraints: c010bfb9) org.apache.lucene:lucene-suggest:9.8.0 (1 constraints: 13053036) org.apache.lucene:lucene-test-framework:9.8.0 (1 constraints: 13053036) -org.apache.opennlp:opennlp-dl:2.2.0 (1 constraints: 0605fb35) -org.apache.opennlp:opennlp-tools:2.2.0 (4 constraints: ce2f8ea1) +org.apache.opennlp:opennlp-dl:2.3.1 (1 constraints: 0805ff35) +org.apache.opennlp:opennlp-tools:2.3.1 (4 constraints: d22fdaa2) org.apache.pdfbox:fontbox:2.0.26 (1 constraints: 180b72d8) org.apache.pdfbox:jbig2-imageio:3.0.4 (1 constraints: 5e0cef01) org.apache.pdfbox:jempbox:1.8.16 (1 constraints: 970c910e) @@ -390,11 +390,11 @@ com.amazonaws:aws-java-sdk-core:1.12.501 (2 constraints: b01a32b3) com.amazonaws:aws-java-sdk-kms:1.12.501 (1 constraints: 060dbd37) com.amazonaws:aws-java-sdk-s3:1.12.501 (1 constraints: 10136f43) com.amazonaws:jmespath-java:1.12.501 (2 constraints: b01a32b3) -com.fasterxml.jackson.dataformat:jackson-dataformat-xml:2.15.2 (2 constraints: aa195413) -com.fasterxml.jackson.datatype:jackson-datatype-jdk8:2.15.2 (3 constraints: fc2e56b4) -com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.15.2 (4 constraints: 6d485635) -com.fasterxml.jackson.module:jackson-module-kotlin:2.15.2 (2 constraints: a91d3a60) -com.fasterxml.jackson.module:jackson-module-parameter-names:2.15.2 (2 constraints: 0c243f82) +com.fasterxml.jackson.dataformat:jackson-dataformat-xml:2.15.3 (2 constraints: ab195513) +com.fasterxml.jackson.datatype:jackson-datatype-jdk8:2.15.3 (3 constraints: fd2e92b4) +com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.15.3 (4 constraints: 6e48d735) +com.fasterxml.jackson.module:jackson-module-kotlin:2.15.3 (2 constraints: aa1d6760) +com.fasterxml.jackson.module:jackson-module-parameter-names:2.15.3 (2 constraints: 0d247b82) com.google.cloud:google-cloud-nio:0.127.3 (1 constraints: 9a0e5e6c) com.nimbusds:content-type:2.2 (1 constraints: d80b68eb) com.nimbusds:lang-tag:1.7 (1 constraints: dc0b6aeb) diff --git a/versions.props b/versions.props index 3075bb25f77..a8696d0d5aa 100644 --- a/versions.props +++ b/versions.props @@ -48,7 +48,7 @@ org.apache.httpcomponents:httpmime=4.5.14 org.apache.kerby:*=1.0.1 org.apache.logging.log4j:*=2.20.0 org.apache.lucene:*=9.8.0 -org.apache.opennlp:opennlp*=2.2.0 +org.apache.opennlp:opennlp*=2.3.1 org.apache.tika:*=1.28.5 org.apache.tomcat:annotations-api=6.0.53 org.apache.zookeeper:*=3.9.0 From ef9364aa5d6afdb11df86c132047a097386cba36 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Mon, 27 Nov 2023 12:52:24 -0500 Subject: [PATCH 15/27] no longer need workarounds for gpu/cpu issues with updated OpenNLP. --- solr/packaging/test/test_opennlp.bats | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/solr/packaging/test/test_opennlp.bats b/solr/packaging/test/test_opennlp.bats index a56ca1b3566..2b2638bf54e 100644 --- a/solr/packaging/test/test_opennlp.bats +++ b/solr/packaging/test/test_opennlp.bats @@ -43,20 +43,13 @@ teardown() { # I also have dreams of incorporating this as code snippets in a Tutorial via the ascii doc tags # like we use for the SolrJ code snippets. That way we know the snippets continue to work! @test "Check lifecycle of sentiment classification" { - - pip install transformers onnx onnxruntime - python -m transformers.onnx -m nlptown/bert-base-multilingual-uncased-sentiment --feature sequence-classification ${SOLR_TIP}/models/sentiment + pip3 install transformers onnx onnxruntime torch + python3 -m transformers.onnx -m nlptown/bert-base-multilingual-uncased-sentiment --feature sequence-classification ${SOLR_TIP}/models/sentiment curl --insecure -o ${SOLR_TIP}/models/sentiment/vocab.txt https://huggingface.co/nlptown/bert-base-multilingual-uncased-sentiment/resolve/main/vocab.txt - # GPU versions is linux and windows only, not OSX. So swap jars. - # Pending https://issues.apache.org/jira/browse/OPENNLP-1515 - rm -f ${SOLR_TIP}/modules/analysis-extras/lib/onnxruntime_gpu-1.14.0.jar - curl --insecure -o ${SOLR_TIP}/modules/analysis-extras/lib/onnxruntime-1.14.0.jar https://repo1.maven.org/maven2/com/microsoft/onnxruntime/onnxruntime/1.14.0/onnxruntime-1.14.0.jar - run ls -alh ${SOLR_TIP}/modules/analysis-extras/lib refute_output --partial "onnxruntime_gpu" - assert_output --partial "onnxruntime-1.14.0.jar" # Can't figure out magic policy stuff to allow loading ONNX, so disable security manager. export SOLR_SECURITY_MANAGER_ENABLED=false From 2c106e9fb59fff13ad930ee215b05cdb72b9ef56 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Tue, 5 Dec 2023 13:15:37 -0500 Subject: [PATCH 16/27] We cleaned up the name ;-) --- .../processor/DocumentCategorizerUpdateProcessorFactory.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java b/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java index 11f924b2dda..949207ccdad 100644 --- a/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java +++ b/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java @@ -45,7 +45,7 @@ import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.Pair; import org.apache.solr.core.SolrCore; -import org.apache.solr.filestore.PackageStoreAPI; +import org.apache.solr.filestore.FileStoreAPI; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.update.AddUpdateCommand; @@ -429,7 +429,7 @@ public final UpdateRequestProcessor getInstance( { // Initialize the categorizer. - var path = solrHome.resolve(PackageStoreAPI.PACKAGESTORE_DIRECTORY); + var path = solrHome.resolve(FileStoreAPI.FILESTORE_DIRECTORY); File modelFile = new File(model); File vocabFile = new File(vocab); From 8d6187fcddb7ce89fae70a1d4715d9221b198eec Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Tue, 9 Jan 2024 11:28:12 -0500 Subject: [PATCH 17/27] prompted to update the locks --- versions.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/versions.lock b/versions.lock index 7efaf9c7f3c..7d0814243ad 100644 --- a/versions.lock +++ b/versions.lock @@ -9,7 +9,7 @@ com.epam:parso:2.0.14 (1 constraints: 8e0c750e) com.fasterxml.jackson:jackson-bom:2.16.1 (12 constraints: 4ef8c555) com.fasterxml.jackson.core:jackson-annotations:2.16.1 (10 constraints: 9cbe7917) com.fasterxml.jackson.core:jackson-core:2.16.1 (13 constraints: 4302f528) -com.fasterxml.jackson.core:jackson-databind:2.16.1 (18 constraints: 8d63998e) +com.fasterxml.jackson.core:jackson-databind:2.16.1 (19 constraints: 9470b014) com.fasterxml.jackson.dataformat:jackson-dataformat-cbor:2.16.1 (2 constraints: 631c9cf1) com.fasterxml.jackson.dataformat:jackson-dataformat-smile:2.16.1 (1 constraints: ba0ead66) com.fasterxml.jackson.module:jackson-module-jaxb-annotations:2.16.1 (2 constraints: a724cce0) @@ -348,7 +348,7 @@ org.reactivestreams:reactive-streams:1.0.4 (3 constraints: 3f2b77fd) org.semver4j:semver4j:5.2.2 (1 constraints: 0b050c36) org.slf4j:jcl-over-slf4j:2.0.10 (3 constraints: f71734b5) org.slf4j:jul-to-slf4j:2.0.10 (3 constraints: 5128305e) -org.slf4j:slf4j-api:2.0.10 (59 constraints: 5b112dd8) +org.slf4j:slf4j-api:2.0.10 (61 constraints: d62c252e) org.tallison:isoparser:1.9.41.7 (1 constraints: fb0c5528) org.tallison:jmatio:1.5 (1 constraints: ff0b57e9) org.tallison:metadata-extractor:2.17.1.0 (1 constraints: f00c3b28) From 50887bb8f6828fb884f8e05fb3131b00eb83e7e0 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Tue, 9 Jan 2024 11:48:50 -0500 Subject: [PATCH 18/27] Add in required license files --- solr/licenses/onnxruntime-LICENSE-MIT.txt | 21 ++ solr/licenses/opennlp-LICENSE-ASL.txt | 314 ++++++++++++++++++++++ solr/licenses/opennlp-NOTICE.txt | 101 +++++++ 3 files changed, 436 insertions(+) create mode 100644 solr/licenses/onnxruntime-LICENSE-MIT.txt create mode 100644 solr/licenses/opennlp-LICENSE-ASL.txt create mode 100644 solr/licenses/opennlp-NOTICE.txt diff --git a/solr/licenses/onnxruntime-LICENSE-MIT.txt b/solr/licenses/onnxruntime-LICENSE-MIT.txt new file mode 100644 index 00000000000..48bc6bb4996 --- /dev/null +++ b/solr/licenses/onnxruntime-LICENSE-MIT.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) Microsoft Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/solr/licenses/opennlp-LICENSE-ASL.txt b/solr/licenses/opennlp-LICENSE-ASL.txt new file mode 100644 index 00000000000..d1f4d5cc086 --- /dev/null +++ b/solr/licenses/opennlp-LICENSE-ASL.txt @@ -0,0 +1,314 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +EXTERNAL COMPONENTS + +Apache PDFBox includes a number of components with separate copyright notices +and license terms. Your use of these components is subject to the terms and +conditions of the following licenses. + +Contributions made to the original PDFBox project: + + Copyright (c) 2002-2007, www.pdfbox.org + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of pdfbox; nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + +Adobe Font Metrics (AFM) for PDF Core 14 Fonts + + This file and the 14 PostScript(R) AFM files it accompanies may be used, + copied, and distributed for any purpose and without charge, with or without + modification, provided that all copyright notices are retained; that the + AFM files are not distributed without this file; that all modifications + to this file or any of the AFM files are prominently noted in the modified + file(s); and that this paragraph is not modified. Adobe Systems has no + responsibility or obligation to support the use of the AFM files. + +CMaps for PDF Fonts (http://opensource.adobe.com/wiki/display/cmap/Downloads) + + Copyright 1990-2009 Adobe Systems Incorporated. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + Neither the name of Adobe Systems Incorporated nor the names of its + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. + +Glyphlist (http://www.adobe.com/devnet/opentype/archives/glyph.html) + + Copyright (c) 1997,1998,2002,2007 Adobe Systems Incorporated + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this documentation file to use, copy, publish, distribute, + sublicense, and/or sell copies of the documentation, and to permit + others to do the same, provided that: + - No modification, editing or other alteration of this document is + allowed; and + - The above copyright notice and this permission notice shall be + included in all copies of the documentation. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this documentation file, to create their own derivative works + from the content of this document to use, copy, publish, distribute, + sublicense, and/or sell the derivative works, and to permit others to do + the same, provided that the derived work is not represented as being a + copy or version of this document. + + Adobe shall not be liable to any party for any loss of revenue or profit + or for indirect, incidental, special, consequential, or other similar + damages, whether based on tort (including without limitation negligence + or strict liability), contract or other legal or equitable grounds even + if Adobe has been advised or had reason to know of the possibility of + such damages. The Adobe materials are provided on an "AS IS" basis. + Adobe specifically disclaims all express, statutory, or implied + warranties relating to the Adobe materials, including but not limited to + those concerning merchantability or fitness for a particular purpose or + non-infringement of any third party rights regarding the Adobe + materials. + diff --git a/solr/licenses/opennlp-NOTICE.txt b/solr/licenses/opennlp-NOTICE.txt new file mode 100644 index 00000000000..5897a68a412 --- /dev/null +++ b/solr/licenses/opennlp-NOTICE.txt @@ -0,0 +1,101 @@ +Apache OpenNLP +Copyright 2021-2023 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +============================================================================ + +The snowball stemmers in +opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball +were developed by Martin Porter and Richard Boulton. +The full snowball package is available from +http://snowball.tartarus.org/ + +============================================================================ + +Wordpiece tokenizer +https://github.com/robrua/easy-bert + +The MIT License (MIT) + +Copyright (c) 2019 Rob Rua + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +============================================================================ + +ONNX Runtime + +MIT License + +Copyright (c) Microsoft Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +============================================================================ + +SLF4J API +https://github.com/qos-ch/slf4j + +MIT License + +Copyright (c) 2004-2022 QOS.ch Sarl (Switzerland) +All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +============================================================================ + +jackson-databind +https://github.com/FasterXML/jackson-databind +The Apache Software License, Version 2.0 From a31ef8e6a56d8aee400de64ca12892ec5f665eb7 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Tue, 9 Jan 2024 14:30:18 -0500 Subject: [PATCH 19/27] lint --- .../processor/DocumentCategorizerUpdateProcessorFactory.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java b/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java index 949207ccdad..50bf908b1f4 100644 --- a/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java +++ b/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java @@ -85,6 +85,7 @@ public class DocumentCategorizerUpdateProcessorFactory extends UpdateRequestProc * @see #pattern */ private String dest = null; + /** * @see #dest */ @@ -129,7 +130,7 @@ public void init(NamedList args) { throw new SolrException(SERVER_ERROR, "Init param '" + MODEL_PARAM + "' must be a "); } model = modelParam.toString(); - System.out.println("In OpenNLP doccat - model: " + model); + log.info("In OpenNLP doccat - model: {}"); Object vocabParam = args.remove(VOCAB_PARAM); if (null == vocabParam) { From 4b7528a29b18f652a2f4f554b26ed523eac5f5fc Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Tue, 9 Jan 2024 17:43:58 -0500 Subject: [PATCH 20/27] precommit warning --- .../processor/DocumentCategorizerUpdateProcessorFactory.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java b/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java index 50bf908b1f4..a7bc5ffd1c5 100644 --- a/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java +++ b/solr/modules/analysis-extras/src/java/org/apache/solr/update/processor/DocumentCategorizerUpdateProcessorFactory.java @@ -130,7 +130,7 @@ public void init(NamedList args) { throw new SolrException(SERVER_ERROR, "Init param '" + MODEL_PARAM + "' must be a "); } model = modelParam.toString(); - log.info("In OpenNLP doccat - model: {}"); + log.info("In OpenNLP doccat - model: {}", model); Object vocabParam = args.remove(VOCAB_PARAM); if (null == vocabParam) { From bdee5683674e92f092b6646e3b79c72aeb55269b Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Tue, 13 Feb 2024 15:35:00 +0000 Subject: [PATCH 21/27] upgrade OpenNLP from 2.3.1 to 2.3.2 (to match Lucene main branch) --- solr/licenses/onnxruntime-1.15.0.jar.sha1 | 1 - solr/licenses/onnxruntime-1.16.3.jar.sha1 | 1 + solr/licenses/opennlp-dl-2.3.1.jar.sha1 | 1 - solr/licenses/opennlp-dl-2.3.2.jar.sha1 | 1 + solr/licenses/opennlp-tools-2.3.1.jar.sha1 | 1 - solr/licenses/opennlp-tools-2.3.2.jar.sha1 | 1 + versions.lock | 8 ++++---- versions.props | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) delete mode 100644 solr/licenses/onnxruntime-1.15.0.jar.sha1 create mode 100644 solr/licenses/onnxruntime-1.16.3.jar.sha1 delete mode 100644 solr/licenses/opennlp-dl-2.3.1.jar.sha1 create mode 100644 solr/licenses/opennlp-dl-2.3.2.jar.sha1 delete mode 100644 solr/licenses/opennlp-tools-2.3.1.jar.sha1 create mode 100644 solr/licenses/opennlp-tools-2.3.2.jar.sha1 diff --git a/solr/licenses/onnxruntime-1.15.0.jar.sha1 b/solr/licenses/onnxruntime-1.15.0.jar.sha1 deleted file mode 100644 index 28b4f0672ce..00000000000 --- a/solr/licenses/onnxruntime-1.15.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -6db39caba947384ce09c3071e84cb73437a77e74 diff --git a/solr/licenses/onnxruntime-1.16.3.jar.sha1 b/solr/licenses/onnxruntime-1.16.3.jar.sha1 new file mode 100644 index 00000000000..a003ecc953f --- /dev/null +++ b/solr/licenses/onnxruntime-1.16.3.jar.sha1 @@ -0,0 +1 @@ +ac414a0cbb03a36cf96b0467091263b8538c70df diff --git a/solr/licenses/opennlp-dl-2.3.1.jar.sha1 b/solr/licenses/opennlp-dl-2.3.1.jar.sha1 deleted file mode 100644 index 12621381b08..00000000000 --- a/solr/licenses/opennlp-dl-2.3.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -8ff28619e6a377fe467b47274f39fd1fc9b2c303 diff --git a/solr/licenses/opennlp-dl-2.3.2.jar.sha1 b/solr/licenses/opennlp-dl-2.3.2.jar.sha1 new file mode 100644 index 00000000000..d0dd4e0d515 --- /dev/null +++ b/solr/licenses/opennlp-dl-2.3.2.jar.sha1 @@ -0,0 +1 @@ +2196a4ffb5517bf04bb118a7646acd6147dafa60 diff --git a/solr/licenses/opennlp-tools-2.3.1.jar.sha1 b/solr/licenses/opennlp-tools-2.3.1.jar.sha1 deleted file mode 100644 index b900c01bb8c..00000000000 --- a/solr/licenses/opennlp-tools-2.3.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -40a6c39a0911ed1fb17171ea043b8bf673bd71b5 diff --git a/solr/licenses/opennlp-tools-2.3.2.jar.sha1 b/solr/licenses/opennlp-tools-2.3.2.jar.sha1 new file mode 100644 index 00000000000..94b2924f8fa --- /dev/null +++ b/solr/licenses/opennlp-tools-2.3.2.jar.sha1 @@ -0,0 +1 @@ +d739edba1e729691ed5ab80e1ccf330555a02ea7 diff --git a/versions.lock b/versions.lock index 9c4a1a9133f..cfb1c65e899 100644 --- a/versions.lock +++ b/versions.lock @@ -9,7 +9,7 @@ com.epam:parso:2.0.14 (1 constraints: 8e0c750e) com.fasterxml.jackson:jackson-bom:2.16.1 (12 constraints: a2fcae61) com.fasterxml.jackson.core:jackson-annotations:2.16.1 (10 constraints: f3c2f2fe) com.fasterxml.jackson.core:jackson-core:2.16.1 (13 constraints: 97063bf8) -com.fasterxml.jackson.core:jackson-databind:2.16.1 (19 constraints: eb745417) +com.fasterxml.jackson.core:jackson-databind:2.16.1 (19 constraints: ea747116) com.fasterxml.jackson.dataformat:jackson-dataformat-cbor:2.16.1 (2 constraints: 631c9cf1) com.fasterxml.jackson.dataformat:jackson-dataformat-smile:2.16.1 (1 constraints: ba0ead66) com.fasterxml.jackson.module:jackson-module-jakarta-xmlbind-annotations:2.16.1 (2 constraints: aa24d1e0) @@ -67,7 +67,7 @@ com.jayway.jsonpath:json-path:2.8.0 (2 constraints: 6c12952c) com.lmax:disruptor:3.4.4 (1 constraints: 0d050a36) com.mchange:c3p0:0.9.5.5 (1 constraints: c80c571b) com.mchange:mchange-commons-java:0.2.19 (1 constraints: 84075b75) -com.microsoft.onnxruntime:onnxruntime:1.15.0 (1 constraints: 040d5d21) +com.microsoft.onnxruntime:onnxruntime:1.16.3 (1 constraints: 080d6321) com.pff:java-libpst:0.9.3 (1 constraints: 630cfa01) com.rometools:rome:1.18.0 (1 constraints: 910c870e) com.rometools:rome-utils:1.18.0 (1 constraints: 10095d96) @@ -236,8 +236,8 @@ org.apache.lucene:lucene-spatial-extras:9.9.2 (1 constraints: 16053536) org.apache.lucene:lucene-spatial3d:9.9.2 (1 constraints: c310c4b9) org.apache.lucene:lucene-suggest:9.9.2 (1 constraints: 16053536) org.apache.lucene:lucene-test-framework:9.9.2 (1 constraints: 16053536) -org.apache.opennlp:opennlp-dl:2.3.1 (1 constraints: 0805ff35) -org.apache.opennlp:opennlp-tools:2.3.1 (4 constraints: d52fbea3) +org.apache.opennlp:opennlp-dl:2.3.2 (1 constraints: 09050036) +org.apache.opennlp:opennlp-tools:2.3.2 (4 constraints: d72f62a4) org.apache.pdfbox:fontbox:2.0.26 (1 constraints: 180b72d8) org.apache.pdfbox:jbig2-imageio:3.0.4 (1 constraints: 5e0cef01) org.apache.pdfbox:jempbox:1.8.16 (1 constraints: 970c910e) diff --git a/versions.props b/versions.props index 0e95806f10d..61c8106b9ed 100644 --- a/versions.props +++ b/versions.props @@ -49,7 +49,7 @@ org.apache.httpcomponents:httpmime=4.5.14 org.apache.kerby:*=1.0.1 org.apache.logging.log4j:*=2.21.0 org.apache.lucene:*=9.9.2 -org.apache.opennlp:opennlp*=2.3.1 +org.apache.opennlp:opennlp*=2.3.2 org.apache.tika:*=1.28.5 org.apache.tomcat:annotations-api=6.0.53 org.apache.zookeeper:*=3.9.1 From 541dceb8420dc5a25d95e6e6c1f1835676a3bc94 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Tue, 13 Feb 2024 15:48:59 +0000 Subject: [PATCH 22/27] tentative: minimum Java17 for this PR --- .../main/java/org/apache/lucene/gradle/WrapperDownloader.java | 4 ++-- gradlew | 2 +- gradlew.bat | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/buildSrc/src/main/java/org/apache/lucene/gradle/WrapperDownloader.java b/buildSrc/src/main/java/org/apache/lucene/gradle/WrapperDownloader.java index e6930af7c74..5123291bb83 100644 --- a/buildSrc/src/main/java/org/apache/lucene/gradle/WrapperDownloader.java +++ b/buildSrc/src/main/java/org/apache/lucene/gradle/WrapperDownloader.java @@ -61,8 +61,8 @@ public static void main(String[] args) { public static void checkVersion() { int major = Runtime.getRuntime().version().feature(); - if (major < 11 || major > 21) { - throw new IllegalStateException("java version must be between 11 and 21, your version: " + major); + if (major < 17 || major > 21) { + throw new IllegalStateException("java version must be between 17 and 21, your version: " + major); } } diff --git a/gradlew b/gradlew index 308a3239001..c4770259bf7 100755 --- a/gradlew +++ b/gradlew @@ -160,7 +160,7 @@ GRADLE_WRAPPER_JAR="$APP_HOME/gradle/wrapper/gradle-wrapper.jar" "$JAVACMD" $JAVA_OPTS --source 11 "$APP_HOME/buildSrc/src/main/java/org/apache/lucene/gradle/WrapperDownloader.java" "$GRADLE_WRAPPER_JAR" WRAPPER_STATUS=$? if [ "$WRAPPER_STATUS" -eq 1 ]; then - echo "ERROR: Something went wrong. Make sure you're using Java version between 11 and 21." + echo "ERROR: Something went wrong. Make sure you're using Java version between 17 and 21." exit $WRAPPER_STATUS elif [ "$WRAPPER_STATUS" -ne 0 ]; then exit $WRAPPER_STATUS diff --git a/gradlew.bat b/gradlew.bat index cb69a0ab1a7..ca7ae1a1741 100644 --- a/gradlew.bat +++ b/gradlew.bat @@ -106,7 +106,7 @@ goto fail :failWithJvmMessage @rem https://github.com/apache/lucene/pull/819 -echo Error: Something went wrong. Make sure you're using Java version between 11 and 21. +echo Error: Something went wrong. Make sure you're using Java version between 17 and 21. :fail rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of From 633391ee3ccb70de60b3f9a04372ab305efd2652 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Tue, 13 Feb 2024 16:03:30 +0000 Subject: [PATCH 23/27] Update gradle-precommit.yml - Java 11 --> 17 --- .github/workflows/gradle-precommit.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/gradle-precommit.yml b/.github/workflows/gradle-precommit.yml index 6b4634736f2..a37f5bce1db 100644 --- a/.github/workflows/gradle-precommit.yml +++ b/.github/workflows/gradle-precommit.yml @@ -18,11 +18,11 @@ jobs: # Setup - uses: actions/checkout@v2 - - name: Set up JDK 11 + - name: Set up JDK 17 uses: actions/setup-java@v2 with: distribution: 'temurin' - java-version: 11 + java-version: 17 java-package: jdk - name: Grant execute permission for gradlew From e2fe1feed5b3ddf9b9c016ca3f68404fca337285 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Tue, 13 Feb 2024 16:11:41 +0000 Subject: [PATCH 24/27] Update solrj-test.yml - Java 11 --> 17 --- .github/workflows/solrj-test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/solrj-test.yml b/.github/workflows/solrj-test.yml index 0941fcd41c7..4dda5b58f1b 100644 --- a/.github/workflows/solrj-test.yml +++ b/.github/workflows/solrj-test.yml @@ -20,11 +20,11 @@ jobs: steps: # Setup - uses: actions/checkout@v2 - - name: Set up JDK 11 + - name: Set up JDK 17 uses: actions/setup-java@v2 with: distribution: 'temurin' - java-version: 11 + java-version: 17 java-package: jdk - name: Grant execute permission for gradlew run: chmod +x gradlew From 855b5ea9063e564d1d903fa936d027e865746b50 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Tue, 13 Feb 2024 16:14:16 +0000 Subject: [PATCH 25/27] Update docker-test.yml - Java 11 --> 17 --- .github/workflows/docker-test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker-test.yml b/.github/workflows/docker-test.yml index dc8cc9df728..2a7d2504dbf 100644 --- a/.github/workflows/docker-test.yml +++ b/.github/workflows/docker-test.yml @@ -25,11 +25,11 @@ jobs: steps: # Setup - uses: actions/checkout@v2 - - name: Set up JDK 11 + - name: Set up JDK 17 uses: actions/setup-java@v2 with: distribution: 'temurin' - java-version: 11 + java-version: 17 java-package: jdk - name: Install ACL run: sudo apt-get install acl From 9c16a8bd103d31d39c99c9a12b512362c34d3c52 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Tue, 13 Feb 2024 16:14:45 +0000 Subject: [PATCH 26/27] Update bin-solr-test.yml - Java 11 --> 17 --- .github/workflows/bin-solr-test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/bin-solr-test.yml b/.github/workflows/bin-solr-test.yml index a832bdcc541..57047326023 100644 --- a/.github/workflows/bin-solr-test.yml +++ b/.github/workflows/bin-solr-test.yml @@ -21,11 +21,11 @@ jobs: steps: # Setup - uses: actions/checkout@v2 - - name: Set up JDK 11 + - name: Set up JDK 17 uses: actions/setup-java@v2 with: distribution: 'temurin' - java-version: 11 + java-version: 17 java-package: jdk - name: Grant execute permission for gradlew run: chmod +x gradlew From 476a7d4252fd0a3b4aefde7c88b675338cdd6a93 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Tue, 13 Feb 2024 16:33:48 +0000 Subject: [PATCH 27/27] undo 'tentative: minimum Java17 for this PR' -- see PR 1510 instead --- .github/workflows/bin-solr-test.yml | 4 ++-- .github/workflows/docker-test.yml | 4 ++-- .github/workflows/gradle-precommit.yml | 4 ++-- .github/workflows/solrj-test.yml | 4 ++-- .../main/java/org/apache/lucene/gradle/WrapperDownloader.java | 4 ++-- gradlew | 2 +- gradlew.bat | 2 +- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/bin-solr-test.yml b/.github/workflows/bin-solr-test.yml index 57047326023..a832bdcc541 100644 --- a/.github/workflows/bin-solr-test.yml +++ b/.github/workflows/bin-solr-test.yml @@ -21,11 +21,11 @@ jobs: steps: # Setup - uses: actions/checkout@v2 - - name: Set up JDK 17 + - name: Set up JDK 11 uses: actions/setup-java@v2 with: distribution: 'temurin' - java-version: 17 + java-version: 11 java-package: jdk - name: Grant execute permission for gradlew run: chmod +x gradlew diff --git a/.github/workflows/docker-test.yml b/.github/workflows/docker-test.yml index 2a7d2504dbf..dc8cc9df728 100644 --- a/.github/workflows/docker-test.yml +++ b/.github/workflows/docker-test.yml @@ -25,11 +25,11 @@ jobs: steps: # Setup - uses: actions/checkout@v2 - - name: Set up JDK 17 + - name: Set up JDK 11 uses: actions/setup-java@v2 with: distribution: 'temurin' - java-version: 17 + java-version: 11 java-package: jdk - name: Install ACL run: sudo apt-get install acl diff --git a/.github/workflows/gradle-precommit.yml b/.github/workflows/gradle-precommit.yml index a37f5bce1db..6b4634736f2 100644 --- a/.github/workflows/gradle-precommit.yml +++ b/.github/workflows/gradle-precommit.yml @@ -18,11 +18,11 @@ jobs: # Setup - uses: actions/checkout@v2 - - name: Set up JDK 17 + - name: Set up JDK 11 uses: actions/setup-java@v2 with: distribution: 'temurin' - java-version: 17 + java-version: 11 java-package: jdk - name: Grant execute permission for gradlew diff --git a/.github/workflows/solrj-test.yml b/.github/workflows/solrj-test.yml index 4dda5b58f1b..0941fcd41c7 100644 --- a/.github/workflows/solrj-test.yml +++ b/.github/workflows/solrj-test.yml @@ -20,11 +20,11 @@ jobs: steps: # Setup - uses: actions/checkout@v2 - - name: Set up JDK 17 + - name: Set up JDK 11 uses: actions/setup-java@v2 with: distribution: 'temurin' - java-version: 17 + java-version: 11 java-package: jdk - name: Grant execute permission for gradlew run: chmod +x gradlew diff --git a/buildSrc/src/main/java/org/apache/lucene/gradle/WrapperDownloader.java b/buildSrc/src/main/java/org/apache/lucene/gradle/WrapperDownloader.java index 5123291bb83..e6930af7c74 100644 --- a/buildSrc/src/main/java/org/apache/lucene/gradle/WrapperDownloader.java +++ b/buildSrc/src/main/java/org/apache/lucene/gradle/WrapperDownloader.java @@ -61,8 +61,8 @@ public static void main(String[] args) { public static void checkVersion() { int major = Runtime.getRuntime().version().feature(); - if (major < 17 || major > 21) { - throw new IllegalStateException("java version must be between 17 and 21, your version: " + major); + if (major < 11 || major > 21) { + throw new IllegalStateException("java version must be between 11 and 21, your version: " + major); } } diff --git a/gradlew b/gradlew index c4770259bf7..308a3239001 100755 --- a/gradlew +++ b/gradlew @@ -160,7 +160,7 @@ GRADLE_WRAPPER_JAR="$APP_HOME/gradle/wrapper/gradle-wrapper.jar" "$JAVACMD" $JAVA_OPTS --source 11 "$APP_HOME/buildSrc/src/main/java/org/apache/lucene/gradle/WrapperDownloader.java" "$GRADLE_WRAPPER_JAR" WRAPPER_STATUS=$? if [ "$WRAPPER_STATUS" -eq 1 ]; then - echo "ERROR: Something went wrong. Make sure you're using Java version between 17 and 21." + echo "ERROR: Something went wrong. Make sure you're using Java version between 11 and 21." exit $WRAPPER_STATUS elif [ "$WRAPPER_STATUS" -ne 0 ]; then exit $WRAPPER_STATUS diff --git a/gradlew.bat b/gradlew.bat index ca7ae1a1741..cb69a0ab1a7 100644 --- a/gradlew.bat +++ b/gradlew.bat @@ -106,7 +106,7 @@ goto fail :failWithJvmMessage @rem https://github.com/apache/lucene/pull/819 -echo Error: Something went wrong. Make sure you're using Java version between 17 and 21. +echo Error: Something went wrong. Make sure you're using Java version between 11 and 21. :fail rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of