From a40ebee64d82749d9c3a6a4ae0bbae2e0be6b503 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Wed, 20 Nov 2024 02:17:28 +0100 Subject: [PATCH] Add Filestream integration (#11332) This commit adds the Filestream integration --- .github/CODEOWNERS | 1 + packages/filestream/_dev/build/build.yml | 4 + packages/filestream/_dev/build/docs/README.md | 38 +++ .../_dev/deploy/docker/docker-compose.yml | 8 + .../docker/sample_logs/test-filestream.log | 201 +++++++++++++ packages/filestream/changelog.yml | 5 + .../test/system/test-filestream-config.yml | 8 + .../generic/agent/stream/filestream.yml.hbs | 146 +++++++++ .../generic/fields/base-fields.yml | 20 ++ .../data_stream/generic/fields/beats.yml | 6 + .../data_stream/generic/fields/ecs.yml | 12 + .../data_stream/generic/fields/filestream.yml | 13 + .../data_stream/generic/manifest.yml | 283 ++++++++++++++++++ .../data_stream/generic/sample_event.json | 65 ++++ packages/filestream/docs/README.md | 38 +++ packages/filestream/img/icon.svg | 4 + packages/filestream/manifest.yml | 26 ++ 17 files changed, 878 insertions(+) create mode 100644 packages/filestream/_dev/build/build.yml create mode 100644 packages/filestream/_dev/build/docs/README.md create mode 100644 packages/filestream/_dev/deploy/docker/docker-compose.yml create mode 100644 packages/filestream/_dev/deploy/docker/sample_logs/test-filestream.log create mode 100644 packages/filestream/changelog.yml create mode 100644 packages/filestream/data_stream/generic/_dev/test/system/test-filestream-config.yml create mode 100644 packages/filestream/data_stream/generic/agent/stream/filestream.yml.hbs create mode 100644 packages/filestream/data_stream/generic/fields/base-fields.yml create mode 100644 packages/filestream/data_stream/generic/fields/beats.yml create mode 100644 packages/filestream/data_stream/generic/fields/ecs.yml create mode 100644 packages/filestream/data_stream/generic/fields/filestream.yml create mode 100644 packages/filestream/data_stream/generic/manifest.yml create mode 100644 packages/filestream/data_stream/generic/sample_event.json create mode 100644 packages/filestream/docs/README.md create mode 100644 packages/filestream/img/icon.svg create mode 100644 packages/filestream/manifest.yml diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 43d9c667c1a..6714d3b9549 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -187,6 +187,7 @@ /packages/f5 @elastic/security-service-integrations /packages/f5_bigip @elastic/security-service-integrations /packages/falco @elastic/security-service-integrations +/packages/filestream @elastic/elastic-agent-data-plane /packages/fim @elastic/sec-linux-platform /packages/fireeye @elastic/security-service-integrations /packages/first_epss @elastic/security-service-integrations diff --git a/packages/filestream/_dev/build/build.yml b/packages/filestream/_dev/build/build.yml new file mode 100644 index 00000000000..84034dcea1e --- /dev/null +++ b/packages/filestream/_dev/build/build.yml @@ -0,0 +1,4 @@ +dependencies: + ecs: + reference: git@v8.8.0 + import_mappings: true diff --git a/packages/filestream/_dev/build/docs/README.md b/packages/filestream/_dev/build/docs/README.md new file mode 100644 index 00000000000..c873dd0e478 --- /dev/null +++ b/packages/filestream/_dev/build/docs/README.md @@ -0,0 +1,38 @@ +# Custom Filestream Log integration + +The `filestream` custom input is used to read lines from active log files. It is the +new, improved alternative to the `log` input. It comes with various improvements +to the existing input: + +1. Checking of `close_*` options happens out of band. Thus, if an output is blocked, +Elastic Agent can close the reader and avoid keeping too many files open. + +2. The order of `parsers` is configurable. So it is possible to parse JSON lines and then +aggregate the contents into a multiline event. + +3. Some position updates and metadata changes no longer depend on the publishing pipeline. +If the pipeline is blocked some changes are still applied to the registry. + +4. Only the most recent updates are serialized to the registry. In contrast, the `log` input +has to serialize the complete registry on each ACK from the outputs. This makes the registry updates +much quicker with this input. + +5. The input ensures that only offsets updates are written to the registry append only log. +The `log` writes the complete file state. + +6. Stale entries can be removed from the registry, even if there is no active input. + +7. The fingerprint file identity is used by default. + +More information can be found on the {{ url "filebeat-input-filestream" "Filestream documentation page" }} + +As Filestream configures a new input, configuring it to collect data +from a file that was previously collected by Custom Logs integration +will result in duplicate data. You may wish to configure +`ignore_older` or temporarily set `ignore_inactive: since_first_start` +to limit the amount of duplicate data collected. + +If the Custom Logs integration is removed and the Custom Filestream +Logs is added in the same policy change, there risk of data being +missed between the last entry ingested by the Custom Logs and the +first one ingested by the Custom Filestream Logs. diff --git a/packages/filestream/_dev/deploy/docker/docker-compose.yml b/packages/filestream/_dev/deploy/docker/docker-compose.yml new file mode 100644 index 00000000000..e2788c97a30 --- /dev/null +++ b/packages/filestream/_dev/deploy/docker/docker-compose.yml @@ -0,0 +1,8 @@ +version: '2.3' +services: + filestream-logfile: + image: alpine + volumes: + - ./sample_logs:/sample_logs:ro + - ${SERVICE_LOGS_DIR}:/var/log + command: /bin/sh -c "cp /sample_logs/* /var/log/" diff --git a/packages/filestream/_dev/deploy/docker/sample_logs/test-filestream.log b/packages/filestream/_dev/deploy/docker/sample_logs/test-filestream.log new file mode 100644 index 00000000000..c732a598f18 --- /dev/null +++ b/packages/filestream/_dev/deploy/docker/sample_logs/test-filestream.log @@ -0,0 +1,201 @@ +214.248.225.154 - - [28/Oct/2024:11:43:05 -0400] "GET /whiteboard/enhance/utilize/generate HTTP/1.1" 100 20297 +67.87.15.150 - spencer4224 [28/Oct/2024:11:43:05 -0400] "PATCH /engage/exploit HTTP/2.0" 501 26094 +44.14.248.249 - grimes5362 [28/Oct/2024:11:43:05 -0400] "PUT /viral HTTP/1.0" 204 22602 +236.45.12.77 - - [28/Oct/2024:11:43:05 -0400] "GET /mindshare/integrated/markets HTTP/1.0" 400 6262 +73.208.135.168 - - [28/Oct/2024:11:43:05 -0400] "POST /enable/enable/leading-edge HTTP/1.0" 201 11352 +16.181.70.54 - - [28/Oct/2024:11:43:05 -0400] "POST /synergistic/granular HTTP/2.0" 100 16148 +166.91.191.192 - wolff1541 [28/Oct/2024:11:43:05 -0400] "PUT /24%2f7 HTTP/2.0" 201 10223 +69.93.175.50 - prosacco2318 [28/Oct/2024:11:43:05 -0400] "HEAD /engineer/bricks-and-clicks/enterprise HTTP/1.1" 304 16039 +91.139.114.101 - daniel6373 [28/Oct/2024:11:43:05 -0400] "DELETE /revolutionary/monetize/deliverables HTTP/1.0" 301 7006 +253.149.37.243 - - [28/Oct/2024:11:43:05 -0400] "PATCH /innovate/systems/mission-critical HTTP/1.1" 204 26320 +228.185.84.86 - - [28/Oct/2024:11:43:05 -0400] "PATCH /killer HTTP/1.0" 503 21032 +85.108.205.194 - schmidt1753 [28/Oct/2024:11:43:05 -0400] "PATCH /portals/eyeballs/infomediaries/innovate HTTP/2.0" 100 21868 +30.149.85.220 - osinski4741 [28/Oct/2024:11:43:05 -0400] "PUT /harness/integrate HTTP/2.0" 100 20344 +30.255.23.38 - sporer8612 [28/Oct/2024:11:43:05 -0400] "POST /exploit HTTP/2.0" 403 19601 +166.13.244.14 - fay5650 [28/Oct/2024:11:43:05 -0400] "PATCH /facilitate/users/extensible HTTP/1.1" 201 13459 +221.247.102.111 - heidenreich2812 [28/Oct/2024:11:43:05 -0400] "PATCH /envisioneer/users/communities HTTP/1.0" 406 5467 +230.147.69.162 - wisozk2837 [28/Oct/2024:11:43:05 -0400] "GET /expedite/functionalities HTTP/1.0" 416 29687 +9.26.150.217 - - [28/Oct/2024:11:43:05 -0400] "PATCH /e-commerce/niches HTTP/1.0" 405 2846 +107.232.151.236 - - [28/Oct/2024:11:43:05 -0400] "HEAD /disintermediate/exploit/iterate HTTP/1.1" 403 10893 +31.234.118.147 - - [28/Oct/2024:11:43:05 -0400] "PATCH /integrate HTTP/2.0" 401 26889 +209.100.29.200 - schoen2016 [28/Oct/2024:11:43:05 -0400] "GET /convergence/initiatives HTTP/1.0" 203 24519 +2.215.95.149 - - [28/Oct/2024:11:43:05 -0400] "POST /visionary/web-readiness/deliverables HTTP/2.0" 100 15468 +235.237.107.159 - - [28/Oct/2024:11:43:05 -0400] "GET /experiences HTTP/2.0" 200 28170 +116.140.10.141 - - [28/Oct/2024:11:43:05 -0400] "POST /sexy/redefine/synthesize/strategic HTTP/2.0" 416 10052 +200.4.44.254 - - [28/Oct/2024:11:43:05 -0400] "PUT /viral HTTP/1.0" 405 11706 +231.77.10.110 - fahey7030 [28/Oct/2024:11:43:05 -0400] "DELETE /next-generation/out-of-the-box/world-class/cutting-edge HTTP/2.0" 403 3203 +67.246.122.23 - - [28/Oct/2024:11:43:05 -0400] "PUT /e-commerce HTTP/2.0" 416 1138 +55.187.214.167 - cruickshank2888 [28/Oct/2024:11:43:05 -0400] "GET /enable/disintermediate/solutions HTTP/1.0" 100 4895 +101.224.81.106 - daniel3724 [28/Oct/2024:11:43:05 -0400] "HEAD /drive/revolutionary/one-to-one/transparent HTTP/1.0" 416 18694 +33.176.208.107 - - [28/Oct/2024:11:43:05 -0400] "GET /mission-critical/networks/user-centric/visualize HTTP/1.1" 301 11319 +72.170.16.151 - borer3807 [28/Oct/2024:11:43:05 -0400] "PUT /synergize/vertical/expedite/embrace HTTP/1.0" 304 8674 +5.11.218.114 - huels2446 [28/Oct/2024:11:43:05 -0400] "DELETE /transparent/roi HTTP/1.1" 304 17706 +19.73.112.30 - lowe6322 [28/Oct/2024:11:43:05 -0400] "PATCH /proactive/enable/deploy HTTP/2.0" 400 22001 +32.72.108.85 - - [28/Oct/2024:11:43:05 -0400] "PUT /streamline/synergize/enable HTTP/1.1" 400 6169 +52.5.52.45 - - [28/Oct/2024:11:43:05 -0400] "POST /users/magnetic/synergize HTTP/1.0" 205 774 +100.122.254.89 - huels7545 [28/Oct/2024:11:43:05 -0400] "GET /bleeding-edge HTTP/2.0" 100 17464 +197.135.19.123 - king6537 [28/Oct/2024:11:43:05 -0400] "DELETE /engineer/envisioneer HTTP/1.0" 302 18057 +132.87.153.166 - - [28/Oct/2024:11:43:05 -0400] "GET /wireless/next-generation/action-items HTTP/1.0" 503 24612 +254.129.28.136 - - [28/Oct/2024:11:43:05 -0400] "DELETE /magnetic/enhance/innovative HTTP/1.1" 500 3058 +156.216.73.88 - marks7766 [28/Oct/2024:11:43:05 -0400] "HEAD /embrace/robust/empower/dynamic HTTP/2.0" 401 9343 +120.91.134.238 - haley5675 [28/Oct/2024:11:43:05 -0400] "PUT /distributed/distributed/platforms/one-to-one HTTP/2.0" 502 13901 +117.107.133.182 - - [28/Oct/2024:11:43:05 -0400] "GET /e-business HTTP/1.1" 302 24950 +148.158.205.188 - quitzon8104 [28/Oct/2024:11:43:05 -0400] "POST /killer/extend HTTP/1.1" 403 25349 +15.11.186.13 - marks8401 [28/Oct/2024:11:43:05 -0400] "POST /next-generation/innovative/cross-platform HTTP/1.0" 504 14462 +120.31.134.117 - morissette1467 [28/Oct/2024:11:43:05 -0400] "PATCH /interactive/scale/engineer HTTP/1.1" 502 6004 +210.6.224.154 - - [28/Oct/2024:11:43:05 -0400] "DELETE /web+services HTTP/1.0" 100 10226 +46.26.51.56 - hermann1032 [28/Oct/2024:11:43:05 -0400] "PUT /infrastructures/enable/enable/vortals HTTP/1.1" 204 22841 +111.35.137.24 - stehr5743 [28/Oct/2024:11:43:05 -0400] "PATCH /disintermediate/vortals/reintermediate/ubiquitous HTTP/1.0" 416 6374 +50.9.184.25 - weimann3762 [28/Oct/2024:11:43:05 -0400] "PATCH /mindshare HTTP/1.1" 403 11147 +96.205.13.247 - - [28/Oct/2024:11:43:05 -0400] "POST /disintermediate/schemas/e-business/intuitive HTTP/1.0" 416 8255 +187.73.167.28 - - [28/Oct/2024:11:43:05 -0400] "PUT /roi/eyeballs/dynamic HTTP/1.1" 304 16758 +190.192.91.189 - - [28/Oct/2024:11:43:05 -0400] "POST /granular HTTP/1.1" 406 7438 +192.218.40.227 - kunde6360 [28/Oct/2024:11:43:05 -0400] "POST /sexy/24%2f7 HTTP/1.0" 504 387 +148.176.81.69 - - [28/Oct/2024:11:43:05 -0400] "PUT /innovate/next-generation HTTP/1.1" 416 24754 +137.67.194.171 - hayes3482 [28/Oct/2024:11:43:05 -0400] "PUT /dot-com/holistic HTTP/1.0" 304 8329 +10.146.240.250 - bradtke1263 [28/Oct/2024:11:43:05 -0400] "DELETE /rich HTTP/1.1" 205 1181 +231.184.96.47 - smitham6816 [28/Oct/2024:11:43:05 -0400] "GET /innovative/back-end/visualize HTTP/1.1" 406 4171 +57.244.102.55 - schmidt1525 [28/Oct/2024:11:43:05 -0400] "PATCH /seamless HTTP/2.0" 503 28955 +37.153.235.170 - mosciski8805 [28/Oct/2024:11:43:05 -0400] "DELETE /morph/convergence/bricks-and-clicks HTTP/1.0" 404 17095 +249.253.248.159 - beer3874 [28/Oct/2024:11:43:05 -0400] "PATCH /envisioneer/rich/benchmark/clicks-and-mortar HTTP/1.0" 200 23104 +112.141.64.157 - - [28/Oct/2024:11:43:05 -0400] "DELETE /metrics/open-source/roi HTTP/2.0" 205 17864 +206.129.76.38 - johnston8358 [28/Oct/2024:11:43:05 -0400] "GET /integrated/cross-media/transparent/scale HTTP/1.0" 404 1808 +220.164.69.246 - - [28/Oct/2024:11:43:05 -0400] "POST /strategic/strategic/b2b HTTP/1.0" 501 5179 +69.195.107.53 - moen4434 [28/Oct/2024:11:43:05 -0400] "PATCH /matrix/markets/viral HTTP/2.0" 416 17421 +133.42.130.233 - - [28/Oct/2024:11:43:05 -0400] "HEAD /b2c HTTP/1.0" 502 20256 +97.30.210.226 - kshlerin7817 [28/Oct/2024:11:43:05 -0400] "GET /redefine/value-added/sticky/mindshare HTTP/1.1" 406 19157 +40.242.45.38 - crona7454 [28/Oct/2024:11:43:05 -0400] "PUT /iterate/e-markets/sexy/plug-and-play HTTP/1.0" 502 11057 +161.196.184.29 - schmeler6270 [28/Oct/2024:11:43:05 -0400] "HEAD /bleeding-edge/generate/networks HTTP/2.0" 200 18966 +225.194.191.121 - legros6112 [28/Oct/2024:11:43:05 -0400] "POST /value-added HTTP/1.1" 501 29171 +7.236.142.221 - jenkins8726 [28/Oct/2024:11:43:05 -0400] "DELETE /niches/scale HTTP/2.0" 500 15988 +126.203.13.63 - schultz7303 [28/Oct/2024:11:43:05 -0400] "POST /systems/clicks-and-mortar HTTP/1.1" 403 8519 +188.109.77.66 - - [28/Oct/2024:11:43:05 -0400] "DELETE /ubiquitous HTTP/2.0" 200 4394 +92.73.224.117 - - [28/Oct/2024:11:43:05 -0400] "POST /world-class HTTP/2.0" 501 22809 +226.16.167.158 - schaefer3138 [28/Oct/2024:11:43:05 -0400] "HEAD /interfaces/communities/innovative HTTP/1.1" 404 3793 +80.183.106.115 - medhurst8327 [28/Oct/2024:11:43:05 -0400] "DELETE /infomediaries/proactive HTTP/2.0" 204 12247 +171.248.159.151 - - [28/Oct/2024:11:43:05 -0400] "POST /engineer HTTP/1.0" 301 7782 +197.212.199.75 - - [28/Oct/2024:11:43:05 -0400] "HEAD /whiteboard/sticky/back-end HTTP/1.0" 502 19845 +87.243.139.22 - - [28/Oct/2024:11:43:05 -0400] "HEAD /infomediaries/enable/value-added HTTP/1.1" 201 24336 +25.158.205.246 - - [28/Oct/2024:11:43:05 -0400] "POST /convergence/unleash HTTP/1.1" 403 17832 +19.229.179.119 - metz1774 [28/Oct/2024:11:43:05 -0400] "PUT /embrace/web-readiness/whiteboard HTTP/1.0" 200 29680 +89.121.75.162 - - [28/Oct/2024:11:43:05 -0400] "GET /models/synthesize HTTP/2.0" 304 13951 +85.205.57.226 - - [28/Oct/2024:11:43:05 -0400] "PUT /synergistic/end-to-end/convergence HTTP/1.0" 503 20696 +110.132.55.244 - - [28/Oct/2024:11:43:05 -0400] "GET /generate/dot-com/innovate HTTP/1.1" 200 10360 +91.62.55.142 - - [28/Oct/2024:11:43:05 -0400] "DELETE /ubiquitous/recontextualize/integrate HTTP/2.0" 404 7334 +241.97.70.90 - - [28/Oct/2024:11:43:05 -0400] "DELETE /drive/b2c/leverage/engineer HTTP/1.1" 100 17798 +201.54.249.55 - - [28/Oct/2024:11:43:05 -0400] "DELETE /optimize HTTP/2.0" 403 3239 +177.81.236.69 - hettinger4814 [28/Oct/2024:11:43:05 -0400] "GET /grow/applications/whiteboard HTTP/1.0" 501 5110 +237.228.249.79 - - [28/Oct/2024:11:43:05 -0400] "HEAD /bricks-and-clicks HTTP/1.1" 416 18105 +159.255.30.56 - - [28/Oct/2024:11:43:05 -0400] "HEAD /cross-media/aggregate/paradigms HTTP/2.0" 201 27927 +83.30.51.81 - - [28/Oct/2024:11:43:05 -0400] "HEAD /disintermediate/viral HTTP/1.1" 205 17337 +160.229.158.184 - - [28/Oct/2024:11:43:05 -0400] "PATCH /web-readiness/cutting-edge/utilize/architectures HTTP/2.0" 406 8004 +126.35.41.120 - - [28/Oct/2024:11:43:05 -0400] "PUT /web+services HTTP/1.0" 400 22200 +202.117.241.106 - - [28/Oct/2024:11:43:05 -0400] "PATCH /methodologies/reinvent HTTP/1.1" 400 21556 +125.26.125.195 - daniel4578 [28/Oct/2024:11:43:05 -0400] "POST /revolutionize/e-enable HTTP/1.0" 400 22173 +2.51.74.89 - - [28/Oct/2024:11:43:05 -0400] "HEAD /infrastructures/eyeballs/monetize HTTP/2.0" 205 13563 +137.45.235.157 - - [28/Oct/2024:11:43:05 -0400] "PUT /cross-platform HTTP/2.0" 416 17775 +226.57.212.87 - roob6810 [28/Oct/2024:11:43:05 -0400] "DELETE /web-enabled HTTP/1.1" 302 9647 +98.199.9.131 - jakubowski5411 [28/Oct/2024:11:43:05 -0400] "HEAD /e-enable/e-markets/benchmark HTTP/1.1" 201 26136 +237.71.63.100 - ward3237 [28/Oct/2024:11:43:05 -0400] "PUT /whiteboard/communities/incubate HTTP/2.0" 404 8185 +86.146.76.252 - bartell8210 [28/Oct/2024:11:43:05 -0400] "GET /mindshare/streamline HTTP/1.1" 302 17471 +235.66.4.90 - - [28/Oct/2024:11:43:05 -0400] "POST /transform HTTP/1.0" 201 2631 +31.91.162.105 - veum1721 [28/Oct/2024:11:43:05 -0400] "POST /exploit HTTP/1.0" 500 21958 +229.209.158.246 - - [28/Oct/2024:11:43:05 -0400] "PUT /paradigms HTTP/1.0" 404 1699 +204.244.210.18 - - [28/Oct/2024:11:43:05 -0400] "GET /e-services HTTP/1.0" 203 7228 +114.149.28.36 - - [28/Oct/2024:11:43:05 -0400] "HEAD /synergies/mindshare/deploy HTTP/1.0" 406 19296 +207.19.146.244 - dicki6423 [28/Oct/2024:11:43:05 -0400] "HEAD /reinvent/e-commerce/transparent/next-generation HTTP/1.0" 501 9713 +153.12.252.54 - - [28/Oct/2024:11:43:05 -0400] "PUT /facilitate HTTP/2.0" 405 22952 +61.73.87.142 - koch5164 [28/Oct/2024:11:43:05 -0400] "DELETE /e-services/cross-platform/clicks-and-mortar/convergence HTTP/2.0" 504 28262 +144.241.56.225 - feeney1412 [28/Oct/2024:11:43:05 -0400] "HEAD /applications/relationships/communities HTTP/2.0" 205 5027 +46.67.215.151 - - [28/Oct/2024:11:43:05 -0400] "PATCH /channels/engage/infrastructures HTTP/2.0" 406 2816 +87.178.118.132 - jenkins6551 [28/Oct/2024:11:43:05 -0400] "GET /functionalities HTTP/1.0" 500 22983 +194.5.137.66 - rohan3165 [28/Oct/2024:11:43:05 -0400] "HEAD /deliverables/enterprise/proactive/sexy HTTP/1.0" 504 6801 +54.166.248.82 - - [28/Oct/2024:11:43:05 -0400] "POST /networks/killer/iterate/24%2f365 HTTP/2.0" 405 25323 +68.170.52.215 - - [28/Oct/2024:11:43:05 -0400] "PUT /bleeding-edge/synthesize/relationships HTTP/1.1" 403 28757 +39.129.251.108 - - [28/Oct/2024:11:43:05 -0400] "POST /enterprise HTTP/1.0" 504 21890 +149.93.65.151 - eichmann1223 [28/Oct/2024:11:43:05 -0400] "PATCH /benchmark/e-commerce/transition HTTP/1.0" 204 16993 +183.139.55.179 - - [28/Oct/2024:11:43:05 -0400] "PATCH /e-commerce/grow HTTP/1.0" 205 21764 +34.8.222.109 - reichel4854 [28/Oct/2024:11:43:05 -0400] "PATCH /enterprise/dynamic HTTP/1.1" 100 25572 +157.68.142.153 - - [28/Oct/2024:11:43:05 -0400] "HEAD /empower/redefine HTTP/2.0" 204 6301 +166.250.12.136 - armstrong6208 [28/Oct/2024:11:43:05 -0400] "DELETE /e-business HTTP/1.1" 400 5057 +75.168.145.227 - lebsack3228 [28/Oct/2024:11:43:05 -0400] "PATCH /solutions/e-markets/repurpose/methodologies HTTP/1.0" 503 17007 +235.118.208.55 - waelchi2177 [28/Oct/2024:11:43:05 -0400] "GET /monetize/content/brand HTTP/1.0" 302 17991 +23.148.90.172 - schuster3382 [28/Oct/2024:11:43:05 -0400] "PATCH /deploy HTTP/1.1" 201 4865 +166.43.138.44 - bins8065 [28/Oct/2024:11:43:05 -0400] "POST /roi/real-time/eyeballs HTTP/2.0" 400 23969 +215.173.10.131 - - [28/Oct/2024:11:43:05 -0400] "GET /empower/envisioneer/deploy/users HTTP/1.0" 200 12412 +76.92.102.53 - - [28/Oct/2024:11:43:05 -0400] "DELETE /back-end/value-added/embrace HTTP/1.1" 416 1902 +99.217.139.166 - - [28/Oct/2024:11:43:05 -0400] "GET /end-to-end/networks HTTP/1.1" 302 21205 +146.177.240.46 - - [28/Oct/2024:11:43:05 -0400] "POST /scalable/integrated/bleeding-edge HTTP/1.1" 100 22397 +51.46.239.116 - - [28/Oct/2024:11:43:05 -0400] "GET /dynamic/visualize HTTP/2.0" 501 255 +43.178.237.148 - jacobson5552 [28/Oct/2024:11:43:05 -0400] "POST /utilize/holistic/embrace/initiatives HTTP/2.0" 201 14199 +212.7.11.21 - hilll5473 [28/Oct/2024:11:43:05 -0400] "DELETE /dot-com/platforms HTTP/1.0" 416 29570 +232.121.223.249 - hilpert6448 [28/Oct/2024:11:43:05 -0400] "GET /aggregate/engineer/seize HTTP/2.0" 203 16329 +65.184.69.31 - - [28/Oct/2024:11:43:05 -0400] "HEAD /partnerships/e-business HTTP/1.0" 500 24261 +182.245.254.8 - - [28/Oct/2024:11:43:05 -0400] "PUT /monetize/vertical HTTP/1.0" 302 27634 +13.170.186.211 - - [28/Oct/2024:11:43:05 -0400] "GET /distributed/infomediaries/distributed HTTP/1.1" 302 13029 +150.165.156.43 - schaden1331 [28/Oct/2024:11:43:05 -0400] "DELETE /convergence/sexy HTTP/1.1" 304 12917 +69.119.77.250 - - [28/Oct/2024:11:43:05 -0400] "HEAD /relationships HTTP/1.1" 401 16380 +252.79.109.241 - - [28/Oct/2024:11:43:05 -0400] "PUT /transform/infrastructures HTTP/1.1" 416 24618 +6.203.191.255 - - [28/Oct/2024:11:43:05 -0400] "PATCH /reintermediate/architect/paradigms HTTP/1.1" 100 20670 +112.108.154.154 - - [28/Oct/2024:11:43:05 -0400] "HEAD /experiences/enterprise/paradigms/incentivize HTTP/2.0" 301 2015 +34.131.84.128 - oreilly8814 [28/Oct/2024:11:43:05 -0400] "PATCH /e-markets/cutting-edge/best-of-breed HTTP/1.1" 400 26712 +4.164.202.23 - - [28/Oct/2024:11:43:05 -0400] "POST /revolutionize/interactive HTTP/1.1" 302 3967 +191.11.199.228 - oberbrunner7433 [28/Oct/2024:11:43:05 -0400] "PUT /evolve/front-end HTTP/2.0" 416 7724 +118.145.205.108 - vonrueden1677 [28/Oct/2024:11:43:05 -0400] "DELETE /wireless/global/innovative/grow HTTP/2.0" 500 1601 +49.172.193.207 - simonis1442 [28/Oct/2024:11:43:05 -0400] "POST /e-business/vortals HTTP/2.0" 501 23392 +232.87.173.70 - terry8108 [28/Oct/2024:11:43:05 -0400] "HEAD /vertical HTTP/2.0" 403 22713 +137.115.57.37 - - [28/Oct/2024:11:43:05 -0400] "PUT /e-commerce/relationships/24%2f365 HTTP/1.0" 400 3453 +19.85.191.235 - - [28/Oct/2024:11:43:05 -0400] "POST /24%2f7/relationships/relationships HTTP/2.0" 416 21944 +34.211.232.117 - lebsack4570 [28/Oct/2024:11:43:05 -0400] "HEAD /web-enabled/disintermediate HTTP/1.0" 405 8271 +192.125.103.30 - grimes4261 [28/Oct/2024:11:43:05 -0400] "PATCH /metrics/leading-edge HTTP/1.0" 401 26907 +26.45.27.126 - keebler2821 [28/Oct/2024:11:43:05 -0400] "POST /streamline HTTP/2.0" 403 7819 +99.81.87.8 - - [28/Oct/2024:11:43:05 -0400] "GET /vertical/recontextualize HTTP/2.0" 403 18932 +226.65.198.79 - - [28/Oct/2024:11:43:05 -0400] "HEAD /open-source/communities/engineer HTTP/2.0" 503 7869 +165.234.250.122 - douglas7655 [28/Oct/2024:11:43:05 -0400] "PUT /models HTTP/1.0" 500 10260 +149.3.123.141 - - [28/Oct/2024:11:43:05 -0400] "DELETE /integrate HTTP/1.0" 500 22877 +156.26.83.228 - - [28/Oct/2024:11:43:05 -0400] "PATCH /24%2f7 HTTP/1.0" 501 6843 +233.185.19.86 - - [28/Oct/2024:11:43:05 -0400] "DELETE /experiences/dynamic/b2c/enable HTTP/2.0" 401 16961 +73.239.130.183 - - [28/Oct/2024:11:43:05 -0400] "HEAD /experiences HTTP/1.0" 203 29292 +234.147.194.109 - - [28/Oct/2024:11:43:05 -0400] "PUT /streamline HTTP/2.0" 416 1435 +36.127.12.229 - - [28/Oct/2024:11:43:05 -0400] "PUT /magnetic HTTP/2.0" 100 4042 +113.142.10.14 - - [28/Oct/2024:11:43:05 -0400] "GET /innovate/bleeding-edge/iterate/synergistic HTTP/1.0" 502 11853 +254.32.185.217 - dicki2143 [28/Oct/2024:11:43:05 -0400] "DELETE /applications HTTP/2.0" 400 5260 +16.152.27.28 - - [28/Oct/2024:11:43:05 -0400] "PATCH /enhance/markets/proactive HTTP/1.0" 403 18791 +113.197.69.122 - - [28/Oct/2024:11:43:05 -0400] "PATCH /global HTTP/1.1" 405 7418 +34.239.255.199 - schinner6573 [28/Oct/2024:11:43:05 -0400] "DELETE /extensible HTTP/1.0" 404 3195 +247.126.96.49 - kovacek2458 [28/Oct/2024:11:43:05 -0400] "GET /open-source HTTP/1.0" 301 17339 +177.21.41.13 - beer2817 [28/Oct/2024:11:43:05 -0400] "PUT /web-readiness/integrate HTTP/1.0" 301 25609 +121.7.10.64 - crist6368 [28/Oct/2024:11:43:05 -0400] "POST /networks/magnetic/aggregate/seize HTTP/1.1" 405 176 +2.181.94.140 - - [28/Oct/2024:11:43:05 -0400] "HEAD /e-business/communities/methodologies HTTP/1.1" 502 26021 +141.51.50.246 - purdy8556 [28/Oct/2024:11:43:05 -0400] "DELETE /cross-media HTTP/2.0" 504 24584 +111.167.199.209 - - [28/Oct/2024:11:43:05 -0400] "HEAD /interfaces/proactive/applications/engineer HTTP/2.0" 204 5448 +243.163.196.253 - - [28/Oct/2024:11:43:05 -0400] "PUT /e-commerce/channels HTTP/1.0" 504 28579 +66.196.205.137 - - [28/Oct/2024:11:43:05 -0400] "DELETE /web-readiness/incubate/leverage HTTP/1.1" 406 17064 +204.120.35.39 - strosin8840 [28/Oct/2024:11:43:05 -0400] "GET /integrate/sticky/facilitate/visionary HTTP/1.0" 205 21089 +224.165.173.148 - yundt7831 [28/Oct/2024:11:43:05 -0400] "POST /ubiquitous HTTP/1.1" 304 11821 +147.49.238.32 - - [28/Oct/2024:11:43:05 -0400] "HEAD /cross-platform/web-enabled HTTP/1.1" 504 20764 +19.14.144.65 - boehm5306 [28/Oct/2024:11:43:05 -0400] "POST /best-of-breed HTTP/2.0" 416 4944 +51.157.50.180 - damore6438 [28/Oct/2024:11:43:05 -0400] "DELETE /whiteboard/mission-critical/synergize HTTP/1.0" 403 5557 +92.33.231.110 - corkery8217 [28/Oct/2024:11:43:05 -0400] "PUT /front-end/productize/b2b HTTP/1.0" 201 16600 +178.214.252.164 - dietrich6307 [28/Oct/2024:11:43:05 -0400] "PUT /infrastructures/innovate HTTP/2.0" 400 10733 +139.169.70.151 - - [28/Oct/2024:11:43:05 -0400] "DELETE /morph/out-of-the-box/web-enabled/b2c HTTP/1.0" 504 8950 +176.60.210.40 - - [28/Oct/2024:11:43:05 -0400] "POST /vortals/customized HTTP/1.0" 400 26581 +238.22.176.96 - hodkiewicz6736 [28/Oct/2024:11:43:05 -0400] "PATCH /robust/rich/e-markets/cross-platform HTTP/1.1" 406 23022 +14.114.248.11 - - [28/Oct/2024:11:43:05 -0400] "GET /intuitive HTTP/1.1" 401 19935 +70.163.153.151 - - [28/Oct/2024:11:43:05 -0400] "GET /frictionless/global HTTP/1.0" 302 8844 +249.107.246.67 - - [28/Oct/2024:11:43:05 -0400] "DELETE /deliver/incubate/whiteboard HTTP/1.0" 503 18881 +115.78.50.96 - kuphal1113 [28/Oct/2024:11:43:05 -0400] "HEAD /synergies HTTP/2.0" 416 6256 +104.118.11.152 - - [28/Oct/2024:11:43:05 -0400] "DELETE /one-to-one/convergence/revolutionize/granular HTTP/1.1" 500 4374 +137.175.20.72 - satterfield7060 [28/Oct/2024:11:43:05 -0400] "PATCH /magnetic/killer HTTP/2.0" 203 19318 +48.105.140.140 - pfannerstill4646 [28/Oct/2024:11:43:05 -0400] "DELETE /synergies/platforms HTTP/2.0" 301 26462 +56.40.176.139 - moen2368 [28/Oct/2024:11:43:05 -0400] "PATCH /proactive/b2b HTTP/2.0" 301 5083 +96.117.203.23 - - [28/Oct/2024:11:43:05 -0400] "GET /morph HTTP/1.0" 204 20356 +160.129.214.251 - senger4573 [28/Oct/2024:11:43:05 -0400] "PUT /portals/infomediaries/synergize/drive HTTP/1.1" 203 23946 +90.54.182.122 - - [28/Oct/2024:11:43:05 -0400] "PUT /paradigms/global/clicks-and-mortar HTTP/1.0" 205 12861 +70.161.32.5 - - [28/Oct/2024:11:43:05 -0400] "POST /innovative/extensible HTTP/2.0" 503 27622 +223.151.80.175 - walter4043 [28/Oct/2024:11:43:05 -0400] "DELETE /empower/global/transition/synergize HTTP/1.1" 416 15885 +16.10.120.64 - rogahn6514 [28/Oct/2024:11:43:05 -0400] "POST /incubate HTTP/1.1" 501 29339 +248.30.33.178 - - [28/Oct/2024:11:43:05 -0400] "PUT /seamless/drive/one-to-one/empower HTTP/1.1" 400 9870 +187.162.232.119 - - [28/Oct/2024:11:43:05 -0400] "HEAD /frictionless HTTP/1.0" 201 25161 +190.3.2.207 - - [28/Oct/2024:11:43:05 -0400] "POST /empower/productize/collaborative HTTP/1.1" 201 13446 +150.236.182.111 - - [28/Oct/2024:11:43:05 -0400] "GET /one-to-one/value-added/e-markets/action-items HTTP/2.0" 401 3108 diff --git a/packages/filestream/changelog.yml b/packages/filestream/changelog.yml new file mode 100644 index 00000000000..81d18bd6de6 --- /dev/null +++ b/packages/filestream/changelog.yml @@ -0,0 +1,5 @@ +- version: "0.0.1" + changes: + - description: Initial Release + type: enhancement + link: https://github.com/elastic/integrations/pull/11332 diff --git a/packages/filestream/data_stream/generic/_dev/test/system/test-filestream-config.yml b/packages/filestream/data_stream/generic/_dev/test/system/test-filestream-config.yml new file mode 100644 index 00000000000..838ea7713f2 --- /dev/null +++ b/packages/filestream/data_stream/generic/_dev/test/system/test-filestream-config.yml @@ -0,0 +1,8 @@ +service: filestream-logfile +input: filestream +data_stream: + vars: + paths: + - "{{SERVICE_LOGS_DIR}}/test-filestream.log" +assert: + hit_count: 201 diff --git a/packages/filestream/data_stream/generic/agent/stream/filestream.yml.hbs b/packages/filestream/data_stream/generic/agent/stream/filestream.yml.hbs new file mode 100644 index 00000000000..e9d8426e903 --- /dev/null +++ b/packages/filestream/data_stream/generic/agent/stream/filestream.yml.hbs @@ -0,0 +1,146 @@ +data_stream: + dataset: {{data_stream.dataset}} +paths: +{{#each paths as |path i|}} + - {{path}} +{{/each}} + +{{#if pipeline}} +pipeline: {{pipeline}} +{{/if}} + +{{#if recursive_glob}} +prospector.scanner.recursive_glob: {{recursive_glob}} +{{/if}} + +{{#if exclude_files}} +prospector.scanner.exclude_files: +{{#each exclude_files as |exclude_file i|}} + - {{exclude_file}} +{{/each}} +{{/if}} + +{{#if include_files}} +prospector.scanner.include_files: +{{#each include_files as |include_file i|}} + - {{include_file}} +{{/each}} +{{/if}} + +{{#if symlinks}} +prospector.scanner.symlinks: {{symlinks}} +{{/if}} + +{{#if resend_on_touch}} +prospector.scanner.resend_on_touch: {{resend_on_touch}} +{{/if}} + +{{#if check_interval}} +prospector.scanner.check_interval: {{check_interval}} +{{/if}} + +{{#if ignore_older}} +ignore_older: {{ignore_older}} +{{/if}} + +{{#if ignore_inactive}} +ignore_inactive: {{ignore_inactive}} +{{/if}} + +{{#if close_on_state_changed_inactive}} +close.on_state_change.inactive: {{close_on_state_changed_inactive}} +{{/if}} + +{{#if close_on_state_changed_renamed}} +close.on_state_change.renamed: {{close_on_state_changed_renamed}} +{{/if}} + +{{#if close_on_state_changed_removed}} +close.on_state_change.removed: {{close_on_state_changed_removed}} +{{/if}} + +{{#if close_reader_eof}} +close.reader.on_eof: {{close_reader_eof}} +{{/if}} + +{{#if close_reader_after_interval}} +close.reader.after_interval: {{close_reader_after_interval}} +{{/if}} + +{{#if clean_inactive}} +clean_inactive: {{clean_inactive}} +{{/if}} + +{{#if clean_removed}} +clean_removed: {{clean_removed}} +{{/if}} + +{{#if backoff_init}} +backoff.init: {{backoff_init}} +{{/if}} + +{{#if backoff_max}} +backoff.max: {{backoff_max}} +{{/if}} + +{{#if rotation_external_strategy_copytruncate}} +rotation.external.strategy.copytruncate: {{rotation_external_strategy_copytruncate}} +{{/if}} + +{{#if encoding}} +encoding: {{encoding}} +{{/if}} + +{{#if exclude_lines}} +exclude_lines: +{{#each exclude_lines as |exclude_line i|}} + - {{exclude_line}} +{{/each}} +{{/if}} + +{{#if include_lines}} +include_lines: +{{#each include_lines as |include_line i|}} + - {{include_line}} +{{/each}} +{{/if}} + +{{#if buffer_size}} +buffer_size: {{buffer_size}} +{{/if}} + +{{#if message_max_bytes}} +message_max_bytes: {{message_max_bytes}} +{{/if}} + +{{#if parsers}} +parsers: +{{parsers}} +{{/if}} + +{{#if tags}} +tags: +{{#each tags as |tag i|}} + - {{tag}} +{{/each}} +{{/if}} + +{{#contains "forwarded" tags}} +publisher_pipeline.disable_host: true +{{/contains}} + +{{#if processors}} +processors: +{{processors}} +{{/if}} + +{{#if harvester_limit }} +harvester_limit: {{harvester_limit}} +{{/if}} + +{{#if fingerprint }} +prospector.scanner.fingerprint.enabled: true +file_identity.fingerprint.enabled: true +file_identity.fingerprint.offset: {{ fingerprint_offset }} +file_identity.fingerprint.length: {{ fingerprint_length }} +{{/if}} diff --git a/packages/filestream/data_stream/generic/fields/base-fields.yml b/packages/filestream/data_stream/generic/fields/base-fields.yml new file mode 100644 index 00000000000..97cf5aa8438 --- /dev/null +++ b/packages/filestream/data_stream/generic/fields/base-fields.yml @@ -0,0 +1,20 @@ +- name: data_stream.type + type: constant_keyword + description: Data stream type. +- name: data_stream.dataset + type: constant_keyword + description: Data stream dataset. +- name: data_stream.namespace + type: constant_keyword + description: Data stream namespace. +- name: event.module + type: constant_keyword + description: Event module + value: filestream +- name: event.dataset + type: constant_keyword + description: Event dataset + value: filestream.generic +- name: "@timestamp" + type: date + description: Event timestamp. diff --git a/packages/filestream/data_stream/generic/fields/beats.yml b/packages/filestream/data_stream/generic/fields/beats.yml new file mode 100644 index 00000000000..ede69588554 --- /dev/null +++ b/packages/filestream/data_stream/generic/fields/beats.yml @@ -0,0 +1,6 @@ +- name: input.type + description: Type of Filebeat input. + type: keyword +- name: tags + type: keyword + description: User defined tags diff --git a/packages/filestream/data_stream/generic/fields/ecs.yml b/packages/filestream/data_stream/generic/fields/ecs.yml new file mode 100644 index 00000000000..10f307537ab --- /dev/null +++ b/packages/filestream/data_stream/generic/fields/ecs.yml @@ -0,0 +1,12 @@ +- name: ecs.version + external: ecs +- name: log.file.path + external: ecs +- name: log.offset + description: Current log offset +- name: log.level + external: ecs +- name: message + external: ecs +- name: event.original + external: ecs diff --git a/packages/filestream/data_stream/generic/fields/filestream.yml b/packages/filestream/data_stream/generic/fields/filestream.yml new file mode 100644 index 00000000000..c01a203aa5f --- /dev/null +++ b/packages/filestream/data_stream/generic/fields/filestream.yml @@ -0,0 +1,13 @@ +- name: log.file.inode + type: keyword + description: | + inode of the ingested file. +- name: log.file.device_id + type: keyword + description: | + device ID from the device where the file is. +- name: log.file.fingerprint + type: keyword + index: false + description: | + The fingerprint of the file when using the fingerprint file identity. diff --git a/packages/filestream/data_stream/generic/manifest.yml b/packages/filestream/data_stream/generic/manifest.yml new file mode 100644 index 00000000000..6715388a27a --- /dev/null +++ b/packages/filestream/data_stream/generic/manifest.yml @@ -0,0 +1,283 @@ +title: Custom Filestream Logs +type: logs +streams: + - input: filestream + description: Collect log data from filestream with Elastic Agent. + title: Custom Filestream Logs + template_path: filestream.yml.hbs + vars: + - name: paths + type: text + title: Paths + multi: true + required: true + show_user: true + default: + - /var/log/*.log + - name: data_stream.dataset + type: text + title: Dataset name + description: | + Dataset to write data to. Changing the dataset will send the data to a different index. You can't use `-` in the name of a dataset and only valid characters for [Elasticsearch index names](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html). + default: filestream.generic + required: true + show_user: true + - name: pipeline + type: text + title: Ingest Pipeline + description: | + The Ingest Node pipeline ID to be used by the integration. + required: false + show_user: true + - name: parsers + type: yaml + title: Parsers + description: | + This option expects a list of parsers that the log line has to go through. For more information see [Parsers](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-filestream.html#_parsers) + required: false + show_user: true + multi: false + default: | + #- ndjson: + # target: "" + # message_key: msg + #- multiline: + # type: count + # count_lines: 3 + - name: exclude_files + type: text + title: Exclude Files + description: | + A list of regular expressions to match the files that you want Elastic Agent to ignore. By default no files are excluded. + required: false + show_user: true + multi: true + default: + - '\.gz$' + - name: include_files + type: text + title: Include Files + description: | + A list of regular expressions to match the files that you want Elastic Agent to include. If a list of regexes is provided, only the files that are allowed by the patterns are harvested. + required: false + show_user: true + multi: true + - name: processors + type: yaml + title: Processors + multi: false + required: false + show_user: false + description: | + Processors are used to reduce the number of fields in the exported event or to enhance the event with metadata. This executes in the agent before the logs are parsed. See [Processors](https://www.elastic.co/guide/en/beats/filebeat/current/filtering-and-enhancing-data.html) for details. + - name: tags + type: text + title: Tags + description: Tags to include in the published event + required: false + multi: true + show_user: true + - name: encoding + type: text + title: Encoding + description: | + The file encoding to use for reading data that contains international characters. For a full list of valid encodings, see the [Documentation](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-filestream.html#_encoding_2) + required: false + show_user: false + - name: recursive_glob + type: bool + title: Recursive Glob + description: | + Enable expanding ** into recursive glob patterns. With this feature enabled, the rightmost ** in each path is expanded into a fixed number of glob patterns. For example: /foo/** expands to /foo, /foo/*, /foo/*/*, and so on. If enabled it expands a single ** into a 8-level deep * pattern. + This feature is enabled by default. Set prospector.scanner.recursive_glob to false to disable it. + required: false + show_user: false + default: true + - name: symlinks + type: bool + title: Enable symlinks + description: | + The symlinks option allows Elastic Agent to harvest symlinks in addition to regular files. When harvesting symlinks, Elastic Agent opens and reads the original file even though it reports the path of the symlink. + ** Because this option may lead to data loss, it is disabled by default. ** + required: false + show_user: false + - name: resend_on_touch + type: bool + title: Resend on touch + description: | + If this option is enabled a file is resent if its size has not changed but its modification time has changed to a later time than before. It is disabled by default to avoid accidentally resending files. + required: false + show_user: false + - name: check_interval + type: text + title: Check Interval + description: | + How often Elastic Agent checks for new files in the paths that are specified for harvesting. For example Specify 1s to scan the directory as frequently as possible without causing Elastic Agent to scan too frequently. ** We do not recommend to set this value <1s. ** + required: false + show_user: false + - name: ignore_older + type: text + title: Ignore Older + description: | + If this option is enabled, Elastic Agent ignores any files that were modified before the specified timespan. You can use time strings like 2h (2 hours) and 5m (5 minutes). The default is 0, which disables the setting. + You must set Ignore Older to be greater than On State Change Inactive. + For more information, please see the [Documentation](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-filestream.html#filebeat-input-filestream-ignore-older) + required: false + show_user: false + - name: ignore_inactive + type: text + title: Ignore Inactive + description: | + If this option is enabled, Elastic Agent ignores every file that has not been updated since the selected time. Possible options are since_first_start and since_last_start. + required: false + show_user: false + - name: close_on_state_changed_inactive + type: text + title: Close on State Changed Inactive + description: | + When this option is enabled, Elastic Agent closes the file handle if a file has not been harvested for the specified duration. The counter for the defined period starts when the last log line was read by the harvester. It is not based on the modification time of the file. If the closed file changes again, a new harvester is started and the latest changes will be picked up after Check Interval has elapsed. + required: false + show_user: false + - name: close_on_state_changed_renamed + type: bool + title: Close on State Changed Renamed + description: | + ** Only use this option if you understand that data loss is a potential side effect. ** + When this option is enabled, Elastic Agent closes the file handler when a file is renamed. This happens, for example, when rotating files. By default, the harvester stays open and keeps reading the file because the file handler does not depend on the file name. + required: false + show_user: false + - name: close_on_state_changed_removed + type: bool + title: Close on State Changed Removed + description: | + When this option is enabled, Elastic Agent closes the harvester when a file is removed. Normally a file should only be removed after it’s inactive for the duration specified by close.on_state_change.inactive. + required: false + show_user: false + - name: close_reader_eof + type: bool + title: Close Reader EOF + description: | + ** Only use this option if you understand that data loss is a potential side effect. ** + When this option is enabled, Elastic Agent closes a file as soon as the end of a file is reached. This is useful when your files are only written once and not updated from time to time. For example, this happens when you are writing every single log event to a new file. This option is disabled by default. + required: false + show_user: false + - name: close_reader_after_interval + type: text + title: Close Reader After Interval + description: | + ** Only use this option if you understand that data loss is a potential side effect. Another side effect is that multiline events might not be completely sent before the timeout expires. ** + This option is particularly useful in case the output is blocked, which makes Elastic Agent keep open file handlers even for files that were deleted from the disk. + For more information see the [documentation](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-filestream.html#filebeat-input-filestream-close-timeout). + required: false + show_user: false + - name: clean_inactive + type: text + title: Clean Inactive + default: -1 + description: | + ** Only use this option if you understand that data loss is a potential side effect. ** + When this option is enabled, Elastic Agent removes the state of a file after the specified period of inactivity has elapsed. + E.g: "30m", Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". By default cleaning inactive states is disabled, -1 is used to disable it. + required: false + show_user: false + - name: clean_removed + type: bool + title: Clean Removed + description: | + When this option is enabled, Elastic Agent cleans files from the registry if they cannot be found on disk anymore under the last known name. + ** You must disable this option if you also disable Close Removed. ** + required: false + show_user: false + - name: harvester_limit + title: Harvester Limit + type: integer + default: 0 + description: | + The harvester_limit option limits the number of harvesters + that are started in parallel for one input. This directly + relates to the maximum number of file handlers that are + opened. The default is 0 (no limit). + - name: backoff_init + type: text + title: Backoff Init + description: | + The backoff option defines how long Elastic Agent waits before checking a file again after EOF is reached. The default is 1s. + required: false + show_user: false + - name: backoff_max + type: text + title: Backoff Max + description: | + The maximum time for Elastic Agent to wait before checking a file again after EOF is reached. The default is 10s. + ** Requirement: Set Backoff Max to be greater than or equal to Backoff Init and less than or equal to Check Interval (Backoff Init <= Backoff Max <= Check Interval). ** + required: false + show_user: false + - name: fingerprint + title: Fingerprint file identity + type: bool + default: true + description: | + ** Changing file_identity methods between runs may result in + duplicated events in the output. ** + Uses a fingerprint generated from the first few bytes (1k is + the default, this can be configured via Fingerprint offset + and length) to identify a file instead inode + device ID. + Refer to https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-filestream.html#_file_identity_2 + for more details. + - name: fingerprint_offset + title: Fingerprint offset + type: integer + default: 0 + description: | + Offset from the beginning of the file to start calculating + the fingerprint. The default is 0. Only used when the + fingerprint file identity is selected + required: false + show_user: false + - name: fingerprint_length + title: Fingerprint length + type: integer + default: 1024 + description: | + The number of bytes used to calculate the fingerprint. The + default is 1024. Only used when the fingerprint file + identity is selected. + required: false + show_user: false + - name: rotation_external_strategy_copytruncate + type: yaml + title: Rotation Strategy + description: "If the log rotating application copies the contents of the active file and then truncates the original file, use these options to help Elastic Agent to read files correctly.\nSet the option suffix_regex so Elastic Agent can tell active and rotated files apart. \nThere are two supported suffix types in the input: numberic and date.\n" + required: false + multi: false + show_user: false + - name: exclude_lines + type: text + title: Exclude Lines + description: | + A list of regular expressions to match the lines that you want Elastic Agent to exclude. Elastic Agent drops any lines that match a regular expression in the list. By default, no lines are dropped. Empty lines are ignored. + required: false + show_user: false + multi: true + - name: include_lines + type: text + title: Include Lines + description: | + A list of regular expressions to match the lines that you want Elastic Agent to include. Elastic Agent exports only the lines that match a regular expression in the list. By default, all lines are exported. Empty lines are ignored. + required: false + show_user: false + multi: true + - name: buffer_size + type: text + title: Buffer Size + description: | + The size in bytes of the buffer that each harvester uses when fetching a file. The default is 16384. + required: false + show_user: false + - name: message_max_bytes + type: text + title: Message Max Bytes + description: | + The maximum number of bytes that a single log message can have. All bytes after mesage_max_bytes are discarded and not sent. The default is 10MB (10485760). + required: false + show_user: false diff --git a/packages/filestream/data_stream/generic/sample_event.json b/packages/filestream/data_stream/generic/sample_event.json new file mode 100644 index 00000000000..de46f2545a6 --- /dev/null +++ b/packages/filestream/data_stream/generic/sample_event.json @@ -0,0 +1,65 @@ +{ + "@timestamp": "2024-10-28T15:55:20.433Z", + "agent": { + "ephemeral_id": "5535d71f-017a-4e16-9d82-0dcfc19f4118", + "id": "c3fc72e7-1d83-46eb-a6b8-3af9fddf4582", + "name": "elastic-agent-25415", + "type": "filebeat", + "version": "8.15.1" + }, + "data_stream": { + "dataset": "filestream.generic", + "namespace": "77736", + "type": "logs" + }, + "ecs": { + "version": "8.0.0" + }, + "elastic_agent": { + "id": "c3fc72e7-1d83-46eb-a6b8-3af9fddf4582", + "snapshot": false, + "version": "8.15.1" + }, + "event": { + "agent_id_status": "verified", + "dataset": "filestream.generic", + "ingested": "2024-10-28T15:55:22Z" + }, + "host": { + "architecture": "x86_64", + "containerized": false, + "hostname": "elastic-agent-25415", + "id": "0fba6dd9e2a445ca80a4261bd56fec54", + "ip": [ + "172.22.0.2", + "172.20.0.4" + ], + "mac": [ + "02-42-AC-14-00-04", + "02-42-AC-16-00-02" + ], + "name": "elastic-agent-25415", + "os": { + "codename": "focal", + "family": "debian", + "kernel": "6.6.8-arch1-1", + "name": "Ubuntu", + "platform": "ubuntu", + "type": "linux", + "version": "20.04.6 LTS (Focal Fossa)" + } + }, + "input": { + "type": "filestream" + }, + "log": { + "file": { + "device_id": "65024", + "fingerprint": "85fde22ceb5920e0273dcfd0bdb4f5740b9cd9f7e58235c7471badf25bc0a400", + "inode": "12486512", + "path": "/tmp/service_logs/test-filestream.log" + }, + "offset": 0 + }, + "message": "214.248.225.154 - - [28/Oct/2024:11:43:05 -0400] \"GET /whiteboard/enhance/utilize/generate HTTP/1.1\" 100 20297" +} \ No newline at end of file diff --git a/packages/filestream/docs/README.md b/packages/filestream/docs/README.md new file mode 100644 index 00000000000..bcc1dd00d35 --- /dev/null +++ b/packages/filestream/docs/README.md @@ -0,0 +1,38 @@ +# Custom Filestream Log integration + +The `filestream` custom input is used to read lines from active log files. It is the +new, improved alternative to the `log` input. It comes with various improvements +to the existing input: + +1. Checking of `close_*` options happens out of band. Thus, if an output is blocked, +Elastic Agent can close the reader and avoid keeping too many files open. + +2. The order of `parsers` is configurable. So it is possible to parse JSON lines and then +aggregate the contents into a multiline event. + +3. Some position updates and metadata changes no longer depend on the publishing pipeline. +If the pipeline is blocked some changes are still applied to the registry. + +4. Only the most recent updates are serialized to the registry. In contrast, the `log` input +has to serialize the complete registry on each ACK from the outputs. This makes the registry updates +much quicker with this input. + +5. The input ensures that only offsets updates are written to the registry append only log. +The `log` writes the complete file state. + +6. Stale entries can be removed from the registry, even if there is no active input. + +7. The fingerprint file identity is used by default. + +More information can be found on the [Filestream documentation page](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-filestream.html) + +As Filestream configures a new input, configuring it to collect data +from a file that was previously collected by Custom Logs integration +will result in duplicate data. You may wish to configure +`ignore_older` or temporarily set `ignore_inactive: since_first_start` +to limit the amount of duplicate data collected. + +If the Custom Logs integration is removed and the Custom Filestream +Logs is added in the same policy change, there risk of data being +missed between the last entry ingested by the Custom Logs and the +first one ingested by the Custom Filestream Logs. diff --git a/packages/filestream/img/icon.svg b/packages/filestream/img/icon.svg new file mode 100644 index 00000000000..173fdec5072 --- /dev/null +++ b/packages/filestream/img/icon.svg @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/packages/filestream/manifest.yml b/packages/filestream/manifest.yml new file mode 100644 index 00000000000..40df1d01a5f --- /dev/null +++ b/packages/filestream/manifest.yml @@ -0,0 +1,26 @@ +format_version: 3.1.5 +name: filestream +title: Custom Filestream Logs +description: Collect log data using filestream with Elastic Agent. +type: integration +version: 0.0.1 +conditions: + kibana: + version: ^8.15.0 +categories: + - custom + - custom_logs +policy_templates: + - name: filestream + title: Custom Filestream Logs + description: Collect log data from filestream with Elastic Agent. + inputs: + - type: filestream + title: Custom Filestream Logs + description: Collect log data from filestream with Elastic Agent. +icons: + - src: "/img/icon.svg" + type: "image/svg+xml" +owner: + github: elastic/elastic-agent-data-plane + type: elastic