Skip to content

Commit

Permalink
Support for adding custom tika configuration (#15)
Browse files Browse the repository at this point in the history
  • Loading branch information
ahilmathew authored Apr 8, 2024
1 parent 0f1ad49 commit 8278ff6
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 25 deletions.
51 changes: 36 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,19 @@ will work with the version of Tika you are installing.
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->


- [Requirements](#requirements)
- [Installing](#installing)
- [Install released version using Helm repository](#install-released-version-using-helm-repository)
- [Install development version using master branch](#install-development-version-using-master-branch)
- [Upgrading](#upgrading)
- [Configuration](#configuration)
- [Deprecated](#deprecated)
- [FAQ](#faq)
- [Contributing](#contributing)
- [tika-helm](#tika-helm)
- [Requirements](#requirements)
- [Installing](#installing)
- [Install released version using Helm repository](#install-released-version-using-helm-repository)
- [Install development version using master branch](#install-development-version-using-master-branch)
- [Custom configuration for tika](#custom-configuration-for-tika)
- [Upgrading](#upgrading)
- [Configuration](#configuration)
- [Deprecated](#deprecated)
- [Contributing](#contributing)
- [More Information](#more-information)
- [Authors](#authors)
- [License](#license)

<!-- END doctoc generated TOC please keep comment here to allow auto update -->
<!-- Use this to update TOC: -->
Expand Down Expand Up @@ -85,21 +89,38 @@ while true; do kubectl --namespace tika-test port-forward $POD_NAME 9998:$CONTAI
* Install it:
- with Helm 3: `helm install tika . --set image.tag=latest-full`

### Custom configuration for tika

To use custom [configuration]( https://tika.apache.org/2.9.1/configuring.html) values for apache tika, use the `tikaConfig` key in the `values.yaml`.
Example:
```
tikaConfig: |
<?xml version="1.0" encoding="UTF-8"?>
<properties>
<parsers>
<!-- Default Parser for most things, except for 2 mime types -->
<parser class="org.apache.tika.parser.DefaultParser">
<mime-exclude>image/jpeg</mime-exclude>
<mime-exclude>application/pdf</mime-exclude>
</parser>
</parsers>
</properties>
```
## Upgrading

Please check `artifacthub.io/changes` in `Chart.yaml` before upgrading.

## Configuration

| Parameter | Description | Default |
|--------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------|
| `...` | ... | ... |
| Parameter | Description | Default |
| --------- | ----------- | ------- |
| `...` | ... | ... |

### Deprecated

| Parameter | Description | Default |
|----------------------|------------------------------------------------------------------------------------------------------------------------------------------------------|---------|
| `...` | ... | `...` |
| Parameter | Description | Default |
| --------- | ----------- | ------- |
| `...` | ... | `...` |

## Contributing

Expand Down
27 changes: 27 additions & 0 deletions templates/configmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

---
{{- if .Values.tikaConfig }}
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ .Chart.Name }}-config
data:
tika-config.xml: |-
{{ .Values.tikaConfig | indent 4 }}
{{- end }}
13 changes: 13 additions & 0 deletions templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,28 @@ spec:
volumes:
- name: sec-ctx-vol
emptyDir: {}
{{- if .Values.tikaConfig }}
- name: tika-config
configMap:
name: {{ .Chart.Name }}-config
defaultMode: 0777
{{- end }}
containers:
- name: {{ .Chart.Name }}
{{- if .Values.tikaConfig }}
args: ["-c" , "/tika-config/tika-config.xml"]
{{- end }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
volumeMounts:
- name: sec-ctx-vol
mountPath: /tmp
{{- if .Values.tikaConfig }}
- name: tika-config
mountPath: /tika-config
{{- end }}
ports:
- name: http
containerPort: {{ .Values.service.port }}
Expand Down
41 changes: 31 additions & 10 deletions values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ image:
repository: apache/tika
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: "2.9.0.0-full"
tag: '2.9.0.0-full'

imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""
nameOverride: ''
fullnameOverride: ''

serviceAccount:
# Specifies whether a service account should be created
Expand All @@ -36,18 +36,19 @@ serviceAccount:
annotations: {}
# The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template
name: ""
name: ''

podAnnotations: {}

podSecurityContext: {}
podSecurityContext:
{}
# fsGroup: 2000

securityContext:
allowPrivilegeEscalation: true
capabilities:
drop:
- ALL
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 35002
Expand All @@ -59,7 +60,8 @@ service:

ingress:
enabled: false
annotations: {}
annotations:
{}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
Expand Down Expand Up @@ -90,10 +92,10 @@ resources:
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
limits:
cpu: "2"
cpu: '2'
memory: 2000Mi
requests:
cpu: "1"
cpu: '1'
memory: 1500Mi

autoscaling:
Expand All @@ -118,4 +120,23 @@ networkPolicy:
allowExternal: false

config:
base_url: "http://localhost/"
base_url: 'http://localhost/'
# Use the below block to use custom tika-config.xml.
# Refer https://tika.apache.org/2.9.1/configuring.html to know more about configuring apache-tika
# tikaConfig: |
# <?xml version="1.0" encoding="UTF-8"?>
# <properties>
# <parsers>
# <!-- Default Parser for most things, except for 2 mime types, and never
# use the Executable Parser -->
# <parser class="org.apache.tika.parser.DefaultParser">
# <mime-exclude>image/jpeg</mime-exclude>
# <mime-exclude>application/pdf</mime-exclude>
# <parser-exclude class="org.apache.tika.parser.executable.ExecutableParser"/>
# </parser>
# <!-- Use a different parser for PDF -->
# <parser class="org.apache.tika.parser.EmptyParser">
# <mime>application/pdf</mime>
# </parser>
# </parsers>
# </properties>

0 comments on commit 8278ff6

Please sign in to comment.