From abdb48dd836fd85bac97629eca1fae247c6b2ae0 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 29 Apr 2021 19:33:17 +0000 Subject: [PATCH 001/126] couple little tweaks --- website/docs/deploy/docker_install_simple.md | 2 +- website/docs/devel/documentation_guide.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/website/docs/deploy/docker_install_simple.md b/website/docs/deploy/docker_install_simple.md index 264e5953..6110c4e7 100644 --- a/website/docs/deploy/docker_install_simple.md +++ b/website/docs/deploy/docker_install_simple.md @@ -97,7 +97,7 @@ docker-compose up -d sflow-collector netflow-collector If the collector(s) are running properly, you should see nfcapd files in subdirectories of data/input_data/, and they should have sizes of more than a few hundred bytes. (See Troubleshooting if you have problems.) -### Running the Collectors and Pipeline +## Running the Collectors and Pipeline {@import ../components/docker_pipeline.md} diff --git a/website/docs/devel/documentation_guide.md b/website/docs/devel/documentation_guide.md index 42a37954..06c1c4a8 100644 --- a/website/docs/devel/documentation_guide.md +++ b/website/docs/devel/documentation_guide.md @@ -80,6 +80,8 @@ $ USE_SSH="true" GIT_USER="your-username" yarn deploy ``` replacing your-username. This sets a couple env vars then runs 'yarn deploy' which runs 'docusaurus deploy' (see package.json) which pushes the static website created to url: "https://netsage-project.github.io" (see docusaurus.config.js) +NOTE: You need to have created ssh keys on the host you are running this on and added them to your github account. + ### Removing a version To remove version 1.2.6 for example. From cc64bb23705390ca6cb4d72449e416b978f60893 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Mon, 3 May 2021 15:58:14 +0000 Subject: [PATCH 002/126] Added util/prcoess_caida_file.pl --- util/process_caida_file.pl | 80 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 util/process_caida_file.pl diff --git a/util/process_caida_file.pl b/util/process_caida_file.pl new file mode 100644 index 00000000..6bc264be --- /dev/null +++ b/util/process_caida_file.pl @@ -0,0 +1,80 @@ +#!/usr/bin/perl +use strict; +use warnings; +use Data::Dumper; +# +# This script is used to process as-org database files downloaded from caida.org +# into the format the netsage pipeline requires. +# +# First, get the txt file from caida (these are released quarterly) +# eg, $ wget https://publicdata.caida.org/datasets/as-organizations/20210401.as-org2info.txt.gz +# and $ gunzip 20210401.as-org2info.txt.gz +# +# Then run this script +# eg, $ process-caida-file.pl 20210401.as-org2info.txt +# CAIDA-test.csv will be created. +# +# Save the final file with a name like CAIDA-2021-0401-lookup.csv +# Do a test run to be sure things are ok, as much as possible. +# +# Finally, +# Copy it to scienceregistry.grnoc.iu.edu - /usr/share/resourcedb/www/exported/CAIDA-org-lookup.csv +# so cron jobs on pipeline hosts will start downloading it. +# Note that it won't be used in the pipeline until logstash restarts. IU hosts have a cron job to restart logstash. +# (Docker instances will download it periodically but they don't currently restart logstash automatically.) +# +my $input_file = $ARGV[0]; +if (! -e $input_file) { die ("$input_file was not found\n"); } +if (! open( INFILE, '<', $input_file) ) { die ("Error opening $input_file\n"); }; +print ("Processing $input_file\n"); + +my $output_file = "caida-test.csv"; +if (! open( OUTFILE, '>', $output_file) ) { die ("Error opening $output_file\n"); }; +print ("Writing $output_file\n"); + +my $orgs; +my $asn_orgs; +my $section = "headers"; +while (my $line = ) { + chomp $line; + next if ($section eq "headers" and $line !~ /format:/); + if ($section eq "headers" and $line =~ /format:/) { + $section = "orgs"; + next; + } + if ($section eq "orgs" and $line =~ /format:/) { + $section = "asns"; + next; + } + + # have to escape the | separator! + my @parts = split('\|', $line); + + if ($section eq "orgs") { + # $orgs with key org-id = org-name + $orgs->{$parts[0]} = $parts[2]; + } + + if ($section eq "asns") { + # $asn_orgs with key asn = org-name + $asn_orgs->{$parts[0]} = $orgs->{$parts[3]}; + } +} + +# sort by ASN +my @sorted_asns = sort {$a <=> $b} keys $asn_orgs; + +foreach my $asn (@sorted_asns) { + my $org = $asn_orgs->{$asn}; + # handle missing orgs, quotes, backslashes, and commas in org names + if (! $org) { $org = "Unknown"; } + $org =~ s/\\/ /g; + $org =~ s/"/""/g; +# if ($org =~ /[,"]/) { $org = '"'.$org.'"'; } + $org = '"'.$org.'"'; + + # asn's are keys in the translate filter and they definitely need to be strings in quotes + $asn = '"'.$asn.'"'; + + print (OUTFILE $asn.','.$org."\n"); +} From 0b701fbcc1e6aca228529d3720d9692b58d27eef Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Mon, 3 May 2021 18:23:16 +0000 Subject: [PATCH 003/126] added @pipeline_ver to logstash fields --- conf-logstash/98-post-process.conf | 1 + grnoc-netsage-deidentifier.spec | 1 + website/docs/devel/tag.md | 4 ++-- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/conf-logstash/98-post-process.conf b/conf-logstash/98-post-process.conf index dd70c55d..9efb1da0 100644 --- a/conf-logstash/98-post-process.conf +++ b/conf-logstash/98-post-process.conf @@ -5,6 +5,7 @@ filter { code => ' event.set( "@exit_time", Time.now ); event.set( "@processing_time", event.get("@exit_time") - event.get("@ingest_time") ); + event.set( "@pipeline_ver", "1.2.10" ); ' tag_on_exception => '_rubyexception in 98-outputs, failed to set @processing_time' } diff --git a/grnoc-netsage-deidentifier.spec b/grnoc-netsage-deidentifier.spec index 78104187..ed5ef56c 100644 --- a/grnoc-netsage-deidentifier.spec +++ b/grnoc-netsage-deidentifier.spec @@ -1,5 +1,6 @@ Summary: GRNOC NetSage Flow-Processing Pipeline Name: grnoc-netsage-deidentifier + # update Version here, in conf-logstash/98-post-process.conf, lib/GRNOC/NetSage/Deidentifier.pm Version: 1.2.9 Release: 1%{?dist} License: GRNOC diff --git a/website/docs/devel/tag.md b/website/docs/devel/tag.md index 65611923..040de851 100644 --- a/website/docs/devel/tag.md +++ b/website/docs/devel/tag.md @@ -1,7 +1,7 @@ --- id: docker_dev_tag -title: Tagging a Release -sidebar_label: How to Tag a New Release +title: How to Tag a New Release +sidebar_label: Taggin a Release --- To tag a new release, first updated the version number and Changes file, build the rpm, etc. and upgrade on bare-metal hosts using yum. If all works fine, do the following steps to create new Docker images. From 596bfac39c2c511335f1da2b2a955176c1d3607e Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 5 May 2021 18:18:48 +0000 Subject: [PATCH 004/126] Documentation updates --- website/docs/components/docker_env.md | 6 +++--- website/docs/deploy/docker_install_advanced.md | 7 ------- website/docs/deploy/docker_install_simple.md | 16 ++++++---------- .../{components => deploy}/docker_upgrade.md | 7 +++++++ website/sidebars.js | 1 + 5 files changed, 17 insertions(+), 20 deletions(-) rename website/docs/{components => deploy}/docker_upgrade.md (94%) diff --git a/website/docs/components/docker_env.md b/website/docs/components/docker_env.md index 0aae416c..845e433b 100644 --- a/website/docs/components/docker_env.md +++ b/website/docs/components/docker_env.md @@ -11,8 +11,8 @@ netflowSensorName=my netflow sensor name Simply change the names to unique identifiers (with spaces or not, no quotes) and you're good to go. :::note -These names uniquely identify the source of the data. In elasticsearch, they are saved in the `meta.sensor_id` field and will be shown in Grafana dashboards. Choose names that are meaningful and unique. -For example, your sensor names might be "RNDNet New York Sflow" and "RNDNet Boston Netflow" or "RNDNet NY-London 1" and "RNDNet NY-London 2". Whatever makes sense in your situation. +These names uniquely identify the source of the data and will be shown in the Grafana dashboards. In elasticsearch, they are saved in the `meta.sensor_id` field. Choose names that are meaningful and unique. +For example, your sensor names might be "RNDNet New York Sflow" and "RNDNet Boston Netflow" or "RNDNet New York - London 1" and "RNDNet New York - London 2". Whatever makes sense in your situation. ::: - If you don't set a sensor name, the default docker hostname, which changes each time you run the pipeline, will be used. @@ -22,7 +22,7 @@ For example, your sensor names might be "RNDNet New York Sflow" and "RNDNet Bost Other settings of note in this file include the following. You will not necessarily need to change these, but be aware. -**rabbit_output_host**: this defines where the final data will land after going through the pipeline. By default, the last rabbit queue will be on `rabbit`, ie, the local rabbitMQ server running in its docker container. Enter a hostname to send to a remote rabbitMQ server (also the correct username, password, and queue key/name). +**rabbit_output_host**: this defines where the final data will land after going through the pipeline. By default, the last rabbit queue will be on `rabbit`, ie, the local rabbitMQ server running in its docker container. Enter a hostname to send to a remote rabbitMQ server (also the correct username, password, and queue key/name). (For NetSage, another logstash pipeline on a remote server moves flows from this final rabbit queue into Elasticsearch.) The following Logstash Aggregation Filter settings are exposed in case you wish to use different values. (See comments in the \*-aggregation.conf file.) The aggregation filter stitches together long-lasting flows that are seen in multiple nfcapd files, matching by the 5-tuple (source and destination IPs, ports, and protocol) plus sensor name. diff --git a/website/docs/deploy/docker_install_advanced.md b/website/docs/deploy/docker_install_advanced.md index b0c7fb12..259571d2 100644 --- a/website/docs/deploy/docker_install_advanced.md +++ b/website/docs/deploy/docker_install_advanced.md @@ -150,13 +150,6 @@ You will also need to uncomment these lines in docker-compose.override.yml: ``` - -## To Process Tstat Data -Tstat data is not collected by nfdump/sfcapd/nfcapd or read by an Importer. Instead, the flow data is sent directly from the router or switch to the logstash pipeline's ingest rabbit queue (named "netsage_deidentifier_raw"). So, when following the Docker Simple guide, the sections related to configuring and starting up the collectors and Importer will not pertain to the tstat sensors. The .env file still needs to be set up though. - -Setting up Tstat is outside the scope of this document, but see the Netsage project Tstat-Transport which contains client programs that can send tstat data to a rabbit queue. See [https://github.com/netsage-project/tstat-transport.git](https://github.com/netsage-project/tstat-transport.git). Basically, you need to have Tstat send data directly to the same rabbit queue that the importers write sflow and netflow data to and that the logstash pipeline reads from. - - ## To Customize Java Settings / Increase Memory Available for Lostash If you need to modify the amount of memory logstash can use or any other java settings, diff --git a/website/docs/deploy/docker_install_simple.md b/website/docs/deploy/docker_install_simple.md index 6110c4e7..bf3d6856 100644 --- a/website/docs/deploy/docker_install_simple.md +++ b/website/docs/deploy/docker_install_simple.md @@ -10,10 +10,10 @@ The Docker containers included in the installation are - sflow-collector (receives sflow data and writes nfcapd files) - netflow-collector (receives netflow data and writes nfcapd files) - importer (reads nfcapd files and puts flows into a local rabbit queue) - - logstash (logstash pipeline that processes flows and sends them to, by default, netsage-elk1.grnoc.iu.edu) + - logstash (logstash pipeline that processes flows and sends them to their final destination, by default a local rabbit queue) - ofelia (cron-like downloading of files used by the logstash pipeline) -The code and configs for the importer and logstash pipeline can be viewed in this github repo (netsage-project/netsage-pipeline). See netsage-project/docker-nfdump-collector for code related to the collectors. +The code and configs for the importer and logstash pipeline can be viewed in the netsage-project/netsage-pipeline github repo. See netsage-project/docker-nfdump-collector for code related to the collectors. ### 1. Set up Data Sources @@ -23,12 +23,12 @@ The data processing pipeline needs data to ingest in order to do anything, of co - netflow - tstat -At least one of these must be set up on a sensor to provide the incoming flow data. +At least one of these must be set up on a sensor (flow exporter/router), to provide the incoming flow data. You can do this step later, but it will helpful to have it working first. Sflow and netflow data should be exported to the pipeline host where there are collectors (nfcapd and/or sfcapd processes) ready to receive it (see below). To use the default settings, send sflow to port 9998 and netflow to port 9999. On the pipeline host, allow incoming traffic from the flow exporters, of course. -Tstat data should be sent directly to the logstash input RabbitMQ queue on the pipeline host. No collector is needed for tstat data. From there, logstash will grab the data and process it the same way as it processes sflow/netflow data. (See the Docker Advanced guide.) +Tstat data should be sent directly to the logstash input rabbit queue "netsage_deidentifier_raw" on the pipeline host. No collector is needed for tstat data. See the netsage-project/tstat-transport repo. (From there, logstash will grab the data and process it the same way as it processes sflow/netflow data. (See the Docker Advanced guide.) ### 2. Clone the Netsage Pipeline Project @@ -58,10 +58,10 @@ cp docker-compose.override_example.yml docker-compose.override.yml By default docker will bring up a single netflow collector and a single sflow collector. If this matches your case, you don't need to make any changes to the docker-compose.override_example.yml. If you have only one collector, remove or comment out the section for the one not needed so the collector doesn't run and simply create empty nfcapd files. :::note -If you only have one collector, you should remove or comment out the section for the collector that is not used. +If you only have one collector, you should remove or comment out the section for the collector that is not used, so it doesn't run and just create empty files. ::: -This file also specifies port numbers, and directories for nfcapd files. By default, the sflow collector will listen to udp traffic on localhost:9998, while the netflow collector will listen on port 9999, and data will be written to `/data/input_data/`. Each collector is namespaced by its type so the sflow collector will write data to `/data/input_data/sflow/` and the netflow collector will write data to `/data/input_data/netflow/`. +This file also specifies port numbers, and directories for nfcapd files. By default, the sflow collector will listen to udp traffic on localhost:9998, while the netflow collector will listen on port 9999, and data will be written to `/data/input_data/`. Each collector is namespaced by its type so the sflow collector will write data to `/data/input_data/sflow/` and the netflow collector will write data to `/data/input_data/netflow/`. Change these only if required. Other lines in this file you can ignore for now. @@ -101,7 +101,3 @@ If the collector(s) are running properly, you should see nfcapd files in subdire {@import ../components/docker_pipeline.md} - -## Upgrading - -{@import ../components/docker_upgrade.md} diff --git a/website/docs/components/docker_upgrade.md b/website/docs/deploy/docker_upgrade.md similarity index 94% rename from website/docs/components/docker_upgrade.md rename to website/docs/deploy/docker_upgrade.md index 708bed07..98ffbc6b 100644 --- a/website/docs/components/docker_upgrade.md +++ b/website/docs/deploy/docker_upgrade.md @@ -1,3 +1,10 @@ +--- +id: docker_upgrade +title: Upgrading +sidebar_label: Docker Pipeline Upgrades +--- + +To upgrade a previous installment of the Dockerized pipeline, perform the following steps. ### Shut things down diff --git a/website/sidebars.js b/website/sidebars.js index 9fc9f34b..6bae0e65 100644 --- a/website/sidebars.js +++ b/website/sidebars.js @@ -13,6 +13,7 @@ module.exports = { "deploy/bare_metal_install", "deploy/docker_install_simple", "deploy/docker_install_advanced", + "deploy/docker_upgrade", "deploy/docker_troubleshoot", ], Development: [ From f4a98567d30675b9fecfeb5112132e474d106721 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 5 May 2021 14:27:35 -0400 Subject: [PATCH 005/126] menu wording change --- website/docs/deploy/docker_upgrade.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/deploy/docker_upgrade.md b/website/docs/deploy/docker_upgrade.md index 98ffbc6b..5579c904 100644 --- a/website/docs/deploy/docker_upgrade.md +++ b/website/docs/deploy/docker_upgrade.md @@ -1,7 +1,7 @@ --- id: docker_upgrade title: Upgrading -sidebar_label: Docker Pipeline Upgrades +sidebar_label: Docker Upgrading --- To upgrade a previous installment of the Dockerized pipeline, perform the following steps. From 2d84ae53a55e0121e6f2d64e687ab2034fd6dcf2 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 5 May 2021 18:42:34 +0000 Subject: [PATCH 006/126] Stop dropping 0.0.0.0 flows --- conf-logstash/10-preliminaries.conf | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/conf-logstash/10-preliminaries.conf b/conf-logstash/10-preliminaries.conf index f9b6c967..bcb7b5b0 100644 --- a/conf-logstash/10-preliminaries.conf +++ b/conf-logstash/10-preliminaries.conf @@ -3,11 +3,10 @@ filter { # 1. Drop flows to or from private addresses (or other ranges we want to drop) - # Apr 2021 - dropping 0.0.0.0/32 flows for now. There is an nfdump bug affecting MPLS flows. (src and dst) cidr { id => "10-1" address => [ "%{[meta][src_ip]}" ] - network => [ "0.0.0.0/32", "10.0.0.0/8", "100.64.0.0/10", "192.168.0.0/16", "172.16.0.0/12", "fc00::/7", "169.254.0.0/16", "fe80::/10", "::1/128" ] + network => [ "10.0.0.0/8", "100.64.0.0/10", "192.168.0.0/16", "172.16.0.0/12", "fc00::/7", "169.254.0.0/16", "fe80::/10", "::1/128" ] add_field => { "@private_src" => "yes" } } # can skip dst if src is private @@ -15,7 +14,7 @@ filter { cidr { id => "10-2" address => [ "%{[meta][dst_ip]}" ] - network => [ "0.0.0.0/32", "10.0.0.0/8", "100.64.0.0/10", "192.168.0.0/16", "172.16.0.0/12", "fc00::/7", "169.254.0.0/16", "fe80::/10", "::1/128" ] + network => [ "10.0.0.0/8", "100.64.0.0/10", "192.168.0.0/16", "172.16.0.0/12", "fc00::/7", "169.254.0.0/16", "fe80::/10", "::1/128" ] add_field => { "@private_dst" => "yes" } } } @@ -23,8 +22,7 @@ filter { drop { } } - # 2. Add ingest_time here in case aggregation isn't done (eg, for tstat). - # (Copy makes a string; have to convert it to a date.) + # 2. Add @ingest_time field (useful for debugging) mutate { id => "10-3" add_field => { '@ingest_time' => "%{@timestamp}" } From 1a7829b4561012ae40dfdfd13d939f7c65c86f2f Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Fri, 7 May 2021 16:56:54 +0000 Subject: [PATCH 007/126] use nfdump 1.6.23, ampath regexes not needed anymore, doc updates --- conf-logstash/support/sensor_groups.json | 1 - conf-logstash/support/sensor_types.json | 1 - docker-compose.override_example.yml | 11 ++++++----- website/docs/deploy/docker_upgrade.md | 14 ++++++++------ 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/conf-logstash/support/sensor_groups.json b/conf-logstash/support/sensor_groups.json index 28ed5f24..1ac85667 100644 --- a/conf-logstash/support/sensor_groups.json +++ b/conf-logstash/support/sensor_groups.json @@ -1,5 +1,4 @@ { - "^AMPATH.*": "AMPATH", "^.*cenic.*": "CENIC", "^FRGP.*": "FRGP", "^GEANT.*": "GEANT", diff --git a/conf-logstash/support/sensor_types.json b/conf-logstash/support/sensor_types.json index 206f4491..026ef616 100644 --- a/conf-logstash/support/sensor_types.json +++ b/conf-logstash/support/sensor_types.json @@ -6,7 +6,6 @@ "^NEAAR.*": "Circuit", "^NEA3R.*": "Circuit", "^TransPAC.*": "Circuit", - "^AMPATH.*$": "Exchange Point", "^SingAREN.*$": "Exchange Point", "^.*pacificwave\\.net$": "Exchange Point", "^.*pnw-gigapop\\.net$": "Exchange Point", diff --git a/docker-compose.override_example.yml b/docker-compose.override_example.yml index 2d943578..75c276df 100644 --- a/docker-compose.override_example.yml +++ b/docker-compose.override_example.yml @@ -1,20 +1,21 @@ version: "3.7" services: + logstash: image: netsage/pipeline_logstash:latest ## If you need to allocate more than 1GB (default) override the JMV options # volumes: # - ./userConfig/jvm.options:/usr/share/logstash/config/jvm.options + importer: image: netsage/pipeline_importer:latest - ## If you are using custom collectors you need to create this file and specify any addition ENV flags to identify the collector source + ## If you add additional collectors, you need to uncomment the following line and modify the file to add additional collections # volumes: # - ./userConfig/netsage_override.xml:/etc/grnoc/netsage/deidentifier/netsage_shared.xml - ## Please remove or add any additional collectors here. Default setup should work fine without any custom config. - ## You may remove any collectors that are not needed. + ## Add any additional collectors here. You may remove any collectors that are not needed. sflow-collector: - image: netsage/nfdump-collector:alpine-nightly + image: netsage/nfdump-collector:alpine-1.6.23 restart: always command: sfcapd -T all -l /data -S 1 -w -z -p 9998 volumes: @@ -22,7 +23,7 @@ services: ports: - "9998:9998/udp" netflow-collector: - image: netsage/nfdump-collector:alpine-nightly + image: netsage/nfdump-collector:alpine-1.6.23 command: nfcapd -T all -l /data -S 1 -w -z -p 9999 ports: - "9999:9999/udp" diff --git a/website/docs/deploy/docker_upgrade.md b/website/docs/deploy/docker_upgrade.md index 5579c904..a598caac 100644 --- a/website/docs/deploy/docker_upgrade.md +++ b/website/docs/deploy/docker_upgrade.md @@ -30,19 +30,21 @@ Example: ```git commit -a -m "Saving local state"; git checkout -b feature/backup; git checkout master``` ::: -### Check/Update Files -- Compare the new docker-compose.override_example.yml file to your docker-compose.override.yml to see if a new version of Docker is required. Look for, eg, version: "3.7" at the top. If the version number is different, change it in your docker-compose.override.yml file and upgrade Docker manually. +### Check/Update Override Files +Occasionally, the required version of Docker or nfdump may change, which will necessitate editing your override and/or env files. -- In the same files, see if the version of nfdump has changed. Look for lines like "image: netsage/nfdump-collector:1.6.18". If there has been a change, update the version in the override file. (You do not need to actually perform any update yourself.) +- Compare the new `docker-compose.override_example.yml` file to your `docker-compose.override.yml` to see if a new version of Docker is required. Look for, eg, `version: "3.7"` at the top. If the version number is different, change it in your docker-compose.override.yml file and upgrade Docker manually. + +- Also check to see if the version of nfdump has changed. Look for lines like `image: netsage/nfdump-collector:1.6.18`. Make sure the version in your override file matches what is the example file. (You do not need to actually perform any upgrade yourself. This will ensure the correct version is pulled from Docker Hub.) Note that you do not need to update the versions of the importer or logstash images. That will be done for you in the "select release version" stop coming up. -- Also compare your .env file with the new env.example file to see if any new lines or sections have been added. Copy new lines into your .env file, making any appropriate changes to example values. +- Also compare your `.env` file with the new `env.example` file to see if any new lines or sections have been added. If there have been any changes relevant to your deployment, eg, new options you want to use, copy the changes into your .env file. -- If you used the Docker Advanced guide to make a netsage_override.xml file, compare it to netsage_shared.xml to see if there are any changes. This is unlikely. +- If you used the Docker Advanced guide to make a `netsage_override.xml` file, compare it to `netsage_shared.xml` to see if there are any changes. This is unlikely. ### Select Release Version -Run these two commands to select the new release you want to run. In the first, replace "{tag}" by the version to run (eg, v1.2.8). When asked by the second, select the same version as the tag you checked out. +Run these two commands to select the new release you want to run. In the first, replace "{tag}" by the version to run (eg, v1.2.10). When asked by the second, select the same version as the tag you checked out. ```sh git checkout -b {tag} git pull From 2715bd370f7293d4dee39e90333aa0628ed91694 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Fri, 7 May 2021 17:21:47 +0000 Subject: [PATCH 008/126] Changed version no. to 1.2.10 --- CHANGES.md | 24 ++++++++++++++++++++---- grnoc-netsage-deidentifier.spec | 2 +- lib/GRNOC/NetSage/Deidentifier.pm | 2 +- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 708a664b..1fa71fd7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,8 +1,23 @@ +------------------------------------------------------ +## GRNOC NetSage Deidentfier 1.2.10 -- May 10 2021 +------------------------------------------------------ +Usage note: With this release, we will move to using nfdump v1.6.23. +This includes a fix for IPs not being parsed in MPLS flows, as well as the fix for missing ASNs from April. + * docker-compose.override_example.yml has been updated to refer to this version. + +Features: + * 15-sensor-specific-changes.conf can now be used to drop all flows from a certain sensor except those from listed ifindexes. + * 0.0.0.0 flows are no longer dropped + * Will now tag flows with the pipeline version number (@pipeline_ver) + * Added a sript (to util/) that can be used to process as-org files from CAIDA into the ASN lookup files that we need. + * Documentation updates + ------------------------------------------------------ ## GRNOC NetSage Deidentfier 1.2.9 -- Apr 7 2021 ------------------------------------------------------ -Usage note: With this release, we are also moving to using a version of nfdump built from github master which includes commits through Feb 20, 2021. This includes a fix for incorrect ASNs being added to flows when the ASN data is actually missing. - * To go along with this, docker-compose.override_example.yml refers to a "nightly" tag of nfdump (this is not updated nightly!) +Usage note: With this release, we are also moving to using a version of nfdump built from github master which includes +commits through Feb 20, 2021. This includes a fix for incorrect ASNs being added to flows when the ASN data is actually missing. + * To go along with this, docker-compose.override_example.yml refers to a "nightly" tag of nfdump (this is not actually updated nightly!) Features: * The installed version of 15-sensor-specific-changes.conf now accomodates environment variables for @@ -17,13 +32,14 @@ Bugs * Flow-filter changes have been made to accomodate changes to simp * Flows with IPs of 0.0.0.0 are dropped * For Docker installs, rabbit host name will be fixed - * Docusaurus and some packages flagged by depndabot were upgraded + * Docusaurus and some packages flagged by dependabot were upgraded ------------------------------------------------------ ## GRNOC NetSage Deidentfier 1.2.8 -- Jan 28 2021 ------------------------------------------------------ Features: - * Added 15-sensor-specific-changes.conf with multiplication by mirroring-sampling rate for a pacificwave sensor and changing of the sensor name for NEAAR flows using a certain ifindex. + * Added 15-sensor-specific-changes.conf with multiplication by mirroring-sampling rate for a pacificwave sensor and changing of + the sensor name for NEAAR flows using a certain ifindex. * Started saving ifindexes to ES (at least for now) * Added consideration of continents to possibly get a country_scope value when a country is missing. * Stopped saving old 'projects' array field to ES diff --git a/grnoc-netsage-deidentifier.spec b/grnoc-netsage-deidentifier.spec index ed5ef56c..ab855932 100644 --- a/grnoc-netsage-deidentifier.spec +++ b/grnoc-netsage-deidentifier.spec @@ -1,7 +1,7 @@ Summary: GRNOC NetSage Flow-Processing Pipeline Name: grnoc-netsage-deidentifier # update Version here, in conf-logstash/98-post-process.conf, lib/GRNOC/NetSage/Deidentifier.pm -Version: 1.2.9 +Version: 1.2.10 Release: 1%{?dist} License: GRNOC Group: Measurement diff --git a/lib/GRNOC/NetSage/Deidentifier.pm b/lib/GRNOC/NetSage/Deidentifier.pm index 395bec49..48112e4b 100644 --- a/lib/GRNOC/NetSage/Deidentifier.pm +++ b/lib/GRNOC/NetSage/Deidentifier.pm @@ -3,7 +3,7 @@ package GRNOC::NetSage::Deidentifier; use strict; use warnings; -our $VERSION = "1.2.9"; +our $VERSION = "1.2.10"; 1; From 024eefe9d32c26674e10e044010fa3f489254b95 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Fri, 28 May 2021 18:58:31 +0000 Subject: [PATCH 009/126] Created 1.2.10 versioned docs --- website/docusaurus.config.js | 2 +- .../version-1.2.10/components/docker_env.md | 41 +++ .../components/docker_first_steps.md | 26 ++ .../components/docker_pipeline.md | 26 ++ .../deploy/bare_metal_install.md | 299 ++++++++++++++++++ .../version-1.2.10/deploy/choosing.md | 25 ++ .../deploy/docker_install_advanced.md | 196 ++++++++++++ .../deploy/docker_install_simple.md | 103 ++++++ .../deploy/docker_troubleshooting.md | 61 ++++ .../version-1.2.10/deploy/docker_upgrade.md | 79 +++++ .../version-1.2.10/devel/docker.md | 83 +++++ .../devel/documentation_guide.md | 142 +++++++++ .../version-1.2.10/devel/pipeline_dataset.md | 34 ++ .../version-1.2.10/devel/tag.md | 46 +++ .../version-1.2.10/pipeline/elastic_search.md | 123 +++++++ .../version-1.2.10/pipeline/importer.md | 14 + .../version-1.2.10/pipeline/intro.md | 37 +++ .../version-1.2.10/pipeline/logstash.md | 127 ++++++++ .../version-1.2.10/pipeline/nfdump.md | 17 + .../version-1.2.10/pipeline/tstat.md | 16 + .../version-1.2.10-sidebars.json | 89 ++++++ website/versions.json | 1 + 22 files changed, 1586 insertions(+), 1 deletion(-) create mode 100644 website/versioned_docs/version-1.2.10/components/docker_env.md create mode 100644 website/versioned_docs/version-1.2.10/components/docker_first_steps.md create mode 100644 website/versioned_docs/version-1.2.10/components/docker_pipeline.md create mode 100644 website/versioned_docs/version-1.2.10/deploy/bare_metal_install.md create mode 100644 website/versioned_docs/version-1.2.10/deploy/choosing.md create mode 100644 website/versioned_docs/version-1.2.10/deploy/docker_install_advanced.md create mode 100644 website/versioned_docs/version-1.2.10/deploy/docker_install_simple.md create mode 100644 website/versioned_docs/version-1.2.10/deploy/docker_troubleshooting.md create mode 100644 website/versioned_docs/version-1.2.10/deploy/docker_upgrade.md create mode 100644 website/versioned_docs/version-1.2.10/devel/docker.md create mode 100644 website/versioned_docs/version-1.2.10/devel/documentation_guide.md create mode 100644 website/versioned_docs/version-1.2.10/devel/pipeline_dataset.md create mode 100644 website/versioned_docs/version-1.2.10/devel/tag.md create mode 100644 website/versioned_docs/version-1.2.10/pipeline/elastic_search.md create mode 100644 website/versioned_docs/version-1.2.10/pipeline/importer.md create mode 100644 website/versioned_docs/version-1.2.10/pipeline/intro.md create mode 100644 website/versioned_docs/version-1.2.10/pipeline/logstash.md create mode 100644 website/versioned_docs/version-1.2.10/pipeline/nfdump.md create mode 100644 website/versioned_docs/version-1.2.10/pipeline/tstat.md create mode 100644 website/versioned_sidebars/version-1.2.10-sidebars.json diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js index 123409c5..ffc9afd6 100644 --- a/website/docusaurus.config.js +++ b/website/docusaurus.config.js @@ -55,7 +55,7 @@ module.exports = { remarkPlugins: [require("remark-import-partial")], sidebarPath: require.resolve("./sidebars.js"), // Please change this to your repo. - lastVersion: "1.2.9", + lastVersion: "1.2.10", versions: { current: { label: `master (unreleased)`, diff --git a/website/versioned_docs/version-1.2.10/components/docker_env.md b/website/versioned_docs/version-1.2.10/components/docker_env.md new file mode 100644 index 00000000..845e433b --- /dev/null +++ b/website/versioned_docs/version-1.2.10/components/docker_env.md @@ -0,0 +1,41 @@ +Please copy `env.example` to `.env` +```sh +cp env.example .env +``` + +then edit the .env file to set the sensor names +```sh +sflowSensorName=my sflow sensor name +netflowSensorName=my netflow sensor name +``` +Simply change the names to unique identifiers (with spaces or not, no quotes) and you're good to go. + +:::note +These names uniquely identify the source of the data and will be shown in the Grafana dashboards. In elasticsearch, they are saved in the `meta.sensor_id` field. Choose names that are meaningful and unique. +For example, your sensor names might be "RNDNet New York Sflow" and "RNDNet Boston Netflow" or "RNDNet New York - London 1" and "RNDNet New York - London 2". Whatever makes sense in your situation. +::: + + - If you don't set a sensor name, the default docker hostname, which changes each time you run the pipeline, will be used. + - If you have only one collector, remove or comment out the line for the one you are not using. + - If you have more than one of the same type of collector, see the "Docker Advanced" documentation. + + +Other settings of note in this file include the following. You will not necessarily need to change these, but be aware. + +**rabbit_output_host**: this defines where the final data will land after going through the pipeline. By default, the last rabbit queue will be on `rabbit`, ie, the local rabbitMQ server running in its docker container. Enter a hostname to send to a remote rabbitMQ server (also the correct username, password, and queue key/name). (For NetSage, another logstash pipeline on a remote server moves flows from this final rabbit queue into Elasticsearch.) + +The following Logstash Aggregation Filter settings are exposed in case you wish to use different values. +(See comments in the \*-aggregation.conf file.) The aggregation filter stitches together long-lasting flows that are seen in multiple nfcapd files, matching by the 5-tuple (source and destination IPs, ports, and protocol) plus sensor name. + +**Aggregation_maps_path**: the name of the file to which logstash will write in-progress aggregation data when logstash shuts down. When logstash starts up again, it will read this file in and resume aggregating. The filename is configurable for complex situations, but /data/ is required. + +**Inactivity_timeout**: If more than inactivity_timeout seconds have passed between the 'start' of a flow and the 'start' +of the LAST matching flow, OR if no matching flow has coming in for inactivity_timeout seconds +on the clock, assume the flow has ended. + +:::note +Nfcapd files are typically written every 5 minutes. Netsage uses an inactivity_timeout = 630 sec = 10.5 min for 5-min files; 960 sec = 16 min for 15-min files. (For 5-min files, this allows one 5 min gap or period during which the no. of bits transferred don't meet the cutoff) +::: + +**max_flow_timeout**: If a long-lasting flow is still aggregating when this timeout is reached, arbitrarily cut it off and start a new flow. The default is 24 hours. + diff --git a/website/versioned_docs/version-1.2.10/components/docker_first_steps.md b/website/versioned_docs/version-1.2.10/components/docker_first_steps.md new file mode 100644 index 00000000..9a75fb05 --- /dev/null +++ b/website/versioned_docs/version-1.2.10/components/docker_first_steps.md @@ -0,0 +1,26 @@ +#### saving this for now in case I need to put it back ####### + +Then checkout the latest version of the code. If you are a developer you'll want the latest version from master, otherwise please use make sure +you've checked out the latest tagged version. + +For example, +```sh +## Normal Deployment, eg, checkout version 1.2.8 +$ git fetch +$ git checkout v1.2.8 -b v1.2.8 + +## Developers +$ git fetch +$ git reset --hard origin/master +``` + +:::warning +git reset --hard will obliterate any changes. On initial installation, you should not have any, but if you do wish to save any state, please make sure you commit and backup to a feature branch before continuing + +Example: +```git commit -a -m "Saving local state"; git checkout -b feature/backup; git checkout master``` +::: + + +All instructions that follow assume these first steps were performed succesfully. If not, you'll likely run into errors down the line if the code doesn't match up with the instructions provided. + diff --git a/website/versioned_docs/version-1.2.10/components/docker_pipeline.md b/website/versioned_docs/version-1.2.10/components/docker_pipeline.md new file mode 100644 index 00000000..c2628348 --- /dev/null +++ b/website/versioned_docs/version-1.2.10/components/docker_pipeline.md @@ -0,0 +1,26 @@ +Start up the pipeline (all containers) using: + +```sh +docker-compose up -d +``` + +This will also restart any containers/processes that have died. "-d" runs containers in the background. + +You can see the status of the containers and whether any have died (exited) using +```sh +docker-compose ps +``` + +To check the logs for each of the containers, run + +```sh +docker-compose logs +``` + +Add `-f` or, e.g., `-f logstash` to see new log messages as they arrive. `--timestamps`, `--tail`, and `--since` are also useful -- look up details in Docker documentation. + +To shut down the pipeline (all containers) use + +```sh +docker-compose down +``` diff --git a/website/versioned_docs/version-1.2.10/deploy/bare_metal_install.md b/website/versioned_docs/version-1.2.10/deploy/bare_metal_install.md new file mode 100644 index 00000000..c0c21510 --- /dev/null +++ b/website/versioned_docs/version-1.2.10/deploy/bare_metal_install.md @@ -0,0 +1,299 @@ +--- +id: bare_metal_install +title: Manual Installation Guide +sidebar_label: Manual Installation +--- + +This document covers installing the NetSage Flow Processing Pipeline manually on a new machine (without using Docker). Steps should be followed below in order unless you know for sure what you are doing. This document assumes a RedHat Linux environment or one of its derivatives. + +## Data sources + +The Processing pipeline needs data to ingest in order to do anything. There are two types of data that can be consumed. + +1. sflow or netflow +2. tstat + +At least one of these must be set up on a sensor to provide the incoming flow data. + +Sflow and netflow data should be sent to ports on the pipeline host where nfcapd and/or sfcapd are ready to receive it. + +Tstat data should be sent directly to the logstash input RabbitMQ queue (the same one that the Importer writes to, if it is used). From there, the data will be processed the same as sflow/netflow data. + +## Installing the Prerequisites + +### Installing nfdump + +The nfdump package provides nfcapd and sfcapd processes which recieve flow data and write nfcapd files. +The Importer also uses nfdump. If you are only collecting tstat data, you do not need nfdump. + + +Nfdump is _not_ listed as a dependency of the Pipeline RPM package, as in a lot cases people are running special builds of nfdump -- but make sure you install it before you try running the Netflow Importer. If in doubt, `yum install nfdump` should work. +Flow data exported by some routers require a newer version of nfdump than the one in the CentOS repos; in these cases, it may be necessary to manually compile and install the lastest nfdump. + +:::note +It is recommended to check the version of nfdump used in the Docker installation and use the same or newer in order to be sure that any fixes for impactful issues are included. +::: + + +If desired, you can also install nfsen, which has a UI for viewing flow data and can manage starting and stopping all the nfcapd/sfcapd processes for you.The nfsen.conf file has a section in which to configure all the sources. + +### Installing RabbitMQ + +The pipeline requires a RabbitMQ server. Typically, this runs on the same server as the pipeline itself, but if need be, you can separate them (for this reason, the Rabbit server is not automatically installed with the pipeline package). + +```sh +[root@host ~]# yum install rabbitmq-server + +``` + +Typically, the default configuration will work. Perform any desired Rabbit configuration, then, start RabbitMQ: + +```sh +[root@host ~]# /sbin/service rabbitmq-server start + or # systemctl start rabbitmq-server.service +``` + +### Installing Logstash + +See the logstash documentation. We are currently using Version 7.10. + +### Installing the EPEL repo + +Some of our dependencies come from the EPEL repo. To install this: + +``` +[root@host ~]# yum install epel-release +``` + +### Installing the GlobalNOC Open Source repo + +The Pipeline package (and its dependencies that are not in EPEL) are in the GlobalNOC Open Source Repo. + +For Red Hat/CentOS 6, create `/etc/yum.repos.d/grnoc6.repo` with the following content. + +``` +[grnoc6] +name=GlobalNOC Public el6 Packages - $basearch +baseurl=https://repo-public.grnoc.iu.edu/repo/6/$basearch +enabled=1 +gpgcheck=1 +gpgkey=https://repo-public.grnoc.iu.edu/repo/RPM-GPG-KEY-GRNOC6 +``` + +For Red Hat/CentOS 7, create `/etc/yum.repos.d/grnoc7.repo` with the following content. + +``` +[grnoc7] +name=GlobalNOC Public el7 Packages - $basearch +baseurl=https://repo-public.grnoc.iu.edu/repo/7/$basearch +enabled=1 +gpgcheck=1 +gpgkey=https://repo-public.grnoc.iu.edu/repo/RPM-GPG-KEY-GRNOC7 +``` + +The first time you install packages from the repo, you will have to accept the GlobalNOC repo key. + +## Installing the Pipeline (Importer and Logstash configs) + +Install it like this: + +``` +[root@host ~]# yum install grnoc-netsage-deidentifier +``` + +Pipeline components: + +1. Flow Filter - GlobalNOC uses this for Cenic data to filter out some flows. Not needed otherwise. +2. Netsage Netflow Importer - required to read nfcapd files from sflow and netflow importers. (If using tstat flow sensors only, this is not needed.) +3. Logstash - be sure the number of logstash pipeline workers in /etc/logstash/logstash.yml is set to 1 or flow stitching/aggregation will not work right! +4. Logstash configs - these are executed in alphabetical order. See the Logstash doc. At a minimum, the input, output, and aggregation configs have parameters that you will need to update or confirm. + +Nothing will automatically start after installation as we need to move on to configuration. + +## Importer Configuration + +Configuration files of interest are + - /etc/grnoc/netsage/deidentifier/netsage_shared.xml - Shared config file allowing configuration of collections, and Rabbit connection information + - /etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml - other settings + - /etc/grnoc/netsage/deidentifier/logging.conf - logging config + - /etc/grnoc/netsage/deidentifier/logging-debug.conf - logging config with debug enabled + +### Setting up the shared config file + +`/etc/grnoc/netsage/deidentifier/netsage_shared.xml` + +There used to be many perl-based pipeline components and daemons. At this point, only the importer is left, the rest having been replaced by logstash. The shared config file, which was formerly used by all the perl components, is read before reading the individual importer config file. + +The most important part of the shared configuration file is the definition of collections. Each sflow or netflow sensor will have its own collection stanza. Here is one such stanza, a netflow example. Instance and router-address can be left commented out. + +``` + + + /path/to/netflow-files/ + + + Netflow Sensor 1 + + + sflow + + + + + + + + +``` + +Having multiple collections in one importer can sometimes cause issues for aggregation, as looping through the collections one at a time adds to the time between the flows, affecting timeouts. You can also set up multiple Importers with differently named shared and importer config files and separate init.d files. + +There is also RabbitMQ connection information in the shared config, though queue names are set in the Importer config. (The Importer does not read from a rabbit queue, but other old components did, so both input and output are set.) + +Ideally, flows should be deidentified before they leave the host on which the data is stored. If flows that have not be deidentified need to be pushed to another node for some reason, the Rabbit connection must be encrypted with SSL. + +If you're running a default RabbitMQ config, which is open only to 'localhost' as guest/guest, you won't need to change anything here. + +``` + + + 127.0.0.1 + 5672 + guest + guest + 0 + 100 + / + 1 + + + + 127.0.0.1 + 5672 + guest + guest + 0 + 100 + / + 1 + +``` + +### Setting up the Importer config file + +`/etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml` + +This file has a few more setting specific to the Importer component which you may like to adjust. + + - Rabbit_output has the name of the output queue. This should be the same as that of the logstash input queue. + - (The Importer does not actually use an input rabbit queue, so we add a "fake" one here.) + - Min-bytes is a threshold applied to flows aggregated within one nfcapd file. Flows smaller than this will be discarded. + - Min-file-age is used to be sure files are complete before being read. + - Cull-enable and cull-ttl can be used to have nfcapd files older than some number of days automatically deleted. + - Pid-file is where the pid file should be written. Be sure this matches what is used in the init.d file. + - Keep num-processes set to 1. + +```xml + + + + + + netsage_deidentifier_netflow_fake + 2 + + + + 3 + netsage_deidentifier_raw + + + + + 100 + + + 1 + + + + + + /var/cache/netsage/netflow_importer.cache + + + + 100000000 + + + 10m + + + + + + + + + + + + + /var/run/netsage-netflow-importer-daemon.pid + + + +``` + +## Logstash Setup Notes + +Standard logstash filter config files are provided with this package. Most should be used as-is, but the input and output configs may be modified for your use. + +The aggregation filter also has settings that may be changed as well - check the two timeouts and the aggregation maps path. + +When upgrading, these logstash configs will not be overwritten. Be sure any changes get copied into the production configs. + +FOR FLOW STITCHING/AGGREGATION - IMPORTANT! +Flow stitching (ie, aggregation) will NOT work properly with more than ONE logstash pipeline worker! +Be sure to set "pipeline.workers: 1" in /etc/logstash/logstash.yml and/or /etc/logstash/pipelines.yml. When running logstash on the command line, use "-w 1". + +## Start Logstash + +```sh +[root@host ~]# /sbin/service logstash start + or # systemctl start logstash.service +``` +It will take couple minutes to start. Log files are normally /var/log/messages and /var/log/logstash/logstash-plain.log. + +When logstash is stopped, any flows currently "in the aggregator" will be written out to /tmp/logstash-aggregation-maps (or the path/file set in 40-aggregation.conf). These will be read in and deleted when logstash is started again. + +## Start the Importer + +Typically, the daemons are started and stopped via init script (CentOS 6) or systemd (CentOS 7). They can also be run manually. The daemons all support these flags: + +`--config [file]` - specify which config file to read + +`--sharedconfig [file]` - specify which shared config file to read + +`--logging [file]` - the logging config + +`--nofork` - run in foreground (do not daemonize) + +```sh +[root@host ~]# /sbin/service netsage-netflow-importer start + or # systemctl start netsage-netflow-importer.service +``` +The Importer will create a deamon process and a worker process. When stopping the service, the worker process might take a few minutes to quit. If it does not quit, kill it by hand. + + +## Cron jobs + +Sample cron files are provided. Please review and uncomment their contents. These periodically download MaxMind, CAIDA, and Science Registry files, and also restart logstash. Logstash needs to be restarted in order for any updated files to be read in. + + + diff --git a/website/versioned_docs/version-1.2.10/deploy/choosing.md b/website/versioned_docs/version-1.2.10/deploy/choosing.md new file mode 100644 index 00000000..43ae4429 --- /dev/null +++ b/website/versioned_docs/version-1.2.10/deploy/choosing.md @@ -0,0 +1,25 @@ +--- +id: choose_install +title: Choosing an Installation Procedure +sidebar_label: Choose Install +--- + +## Manual or BareMetal Installation + +The Manual (baremetal) Installation Guide will walk you through installing the pipeline using your own server infrastructure and requires you to maintain all the components involved. + +It will likely be a bit better when it comes to performance, and have greater flexibility, but there is also more complexity involved in configuring and setting up. + +If you are the ultimate consumer of the data then setting up a baremetal version might be worth doing. Or at least the final rabbitMQ that will be holding the data since it'll like need to handle a large dataset. + +## Dockerized Version + +The Docker version makes it trivial to bring up the pipeline for both a developer and consumer. The work is mostly already done for you. It should be a simple matter of configuring a few env settings and everything should 'just' work. + +If you are simply using the pipeline to deliver the anonymized network stats for someone else's consumption, then using the docker pipeline would be preferred. + +## Choose your adventure + +- [Manual/Server Installation](bare_metal_install) +- [Simple Docker](docker_install_simple.md) - 1 netflow sensor and/or 1 sflow sensor +- [Advanced Docker](docker_install_advanced.md) - options that allow for more complex configurations diff --git a/website/versioned_docs/version-1.2.10/deploy/docker_install_advanced.md b/website/versioned_docs/version-1.2.10/deploy/docker_install_advanced.md new file mode 100644 index 00000000..259571d2 --- /dev/null +++ b/website/versioned_docs/version-1.2.10/deploy/docker_install_advanced.md @@ -0,0 +1,196 @@ +--- +id: docker_install_advanced +title: Docker Advanced Options Guide +sidebar_label: Docker Advanced Options +--- + +If the basic Docker Installation does not meet your needs, the following customizations will allow for more complex situations. Find the section(s) which apply to you. + +*Please first read the Docker Installation guide in detail. This guide will build on top of that.* + + +## To Add an Additional Sflow or Netflow Collector + +If you have more than 1 sflow and/or 1 netflow sensor, you will need to create more collectors and modify the importer config file. The following instructions describe the steps needed to add one additional sensor. + +Any number of sensors can be accomodated, although if there are more than a few being processed by the same Importer, you may run into issues where long-lasting flows from sensosr A time out in the aggregation step while waiting for flows from sensors B to D to be processed. (Another option might be be to run more than one Docker deployment.) + + +### a. Edit docker-compose.override.yml + +The pattern to add a flow collector is always the same. To add an sflow collector called example-collector, edit the docker-compose.override.yml file and add + +```yaml + example-collector: + image: netsage/nfdump-collector:1.6.18 + restart: always + command: sfcapd -T all -l /data -S 1 -w -z -p 9997 + volumes: + - ./data/input_data/example:/data + ports: + - "9997:9997/udp" +``` + +- collector-name: should be updated to something that has some meaning, in our example "example-collector". +- command: choose between sfcapd for sflow and nfcapd for netflow, and at the end of the command, specify the port to watch for incoming flow data. (Unless your flow exporter is already set up to use a different port, you can use the default ports and configure the exporters on the routers to match.) +- ports: make sure the port here matches the port you've set in the command. Naturally all ports have to be unique for this host and the +router should be configured to export data to the same port. (If the port on your docker container is different than the port on your host/local machine, use container_port:host_port.) +- volumes: specify where to write the nfcapd files. Make sure the path is unique and in ./data/. In this case, we're writing to ./data/input_data/example. Change the last part of the path to something meaningful. + +You will also need to uncomment these lines: + +```yaml + volumes: + - ./userConfig/netsage_override.xml:/etc/grnoc/netsage/deidentifier/netsage_shared.xml +``` + + +### b. Edit netsage_override.xml + +To make the Pipeline Importer aware of the new data to process, you will need to create a custom Importer configuration: netsage_override.xml. This will replace the usual config file netsage_shared.xml. + +```sh +cp compose/importer/netsage_shared.xml userConfig/netsage_override.xml +``` + +Edit netsage_override.xml and add a "collection" section for the new sensor as in the following example. The flow-path should match the path set above in docker-compose.override.yml. $exampleSensorName is a new "variable"; it will be replaced with a value set in the .env file. For the flow-type, enter "sflow" or "netflow" as appropriate. + +```xml + + /data/input_data/example/ + $exampleSensorName + sflow + +``` + +### c. Edit environment file + +Then, in the .env file, add a line that sets a value for the "variable" you referenced above, $exampleSensorName. The value is the name of the sensor which will be saved to elasticsearch and which appears in Netsage Dashboards. Set it to something meaningful and unique. + +```ini +exampleSensorName=Example New York sFlow +``` + + +### d. Running the new collector + +After doing the setup above and selecting the docker version to run, you can start the new collector by running the following line, using the collector name (or by running `docker-compose up -d` to start up all containers): + +```sh +docker-compose up -d example-collector +``` + +:::note +The default version of the collector is 1.6.18. There are other versions released and :latest should be point to the latest one, but there is no particular effort made to make sure we released the latest version. You can get a listing of all the current tags listed [here](https://hub.docker.com/r/netsage/nfdump-collector/tags) and the source to generate the docker image can be found [here](https://github.com/netsage-project/docker-nfdump-collector) the code for the You may use a different version though there is no particular effort to have an image for every nfdump release. +::: + + +## To Keep Only Flows From Certain Interfaces +If your sensors are exporting all flows, but you only want to keep some of them (eg, only send some of them to NetSage), use this option. The collectors and importer will process all flows, but in the logstash pipeline, those that do not have src_ifindex or dst_inindex equal to one of the listed interfaces will be dropped. + +In the .env file, uncomment the apprpriate section and enter the information required. Be sure "True" is capitalized as shown and list all the ifindex values of flows that should be kept and passed on to NetSage. You may enter one or more ifindex values. For example, + +```sh +ifindex_filter_flag=True +ifindex_filter_keep=123,456 +``` + +In this case, only flows that have src_ifindex = 123 or src_ifindex = 456 or dst_ifindex = 123 or dst_ifindex = 456 will be kept. All others will be dropped. + + +## To Change a Sensor Name Depending on the Interface Used +In some cases, users want to differentiate between flows that enter or exit through specific sensor interfaces. This can be done by editing the env file. + +In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all 4 fields are set properly! For example, + +```sh +ifindex_sensor_rename_flag=True +ifindex_sensor_rename_old_name=IU Sflow +ifindex_sensor_rename_new_name=IU Bloomington Sflow +ifindex_sensor_rename_ifindex=10032 +``` + +In this case, any flows from the "IU Sflow" sensor that come through interface 10032 (src_ifindex = 10032 OR dst_ifindex = 10032) will have the sensor name (sensor_id in ElasticSearch) changed from "IU Sflow" to "IU Bloomington Sflow". Currently, only one such rename can be configured in Docker. + +:::note +Please notify the devs at IU in advance, if you need to modify a sensor name, because the regexes used for determining sensor_group and sensor_type may have to be updated. +::: + +## To Do Sampling Rate Corrections in Logstash +When flow sampling is done, the number of bits needs to be corrected for the sampling rate. For example, if you are sampling 1 out of 100 flows and a sample has 55 MB, it is assumed that in reality there would be 100 flows of that size (with that src and dst), so the number of bits is multiplied by 100. Usually the collector (nfcapd or sfcapd process) gets the sampling rate from the incoming data and applies the correction, but in some cases, the sensor may not send the sampling rate, or there may be a complex set-up that requires a manual correction. With netflow, a manual correction can be applied using the '-s' option in the nfsen config or the nfcapd command. For sflow, there is no such option. In either case, the correction can be made in logstash as follows. + +In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all 3 fields are set properly! The same correction can be applied to multiple sensors by using a comma-separed list. For example, + +```sh +sampling_correction_flag=True +sampling_correction_sensors=IU Bloomington Sflow, IU Sflow +sampling_correction_factor=512 +``` + +## To Change How Long Nfcapd Files Are Kept +The importer will automatically delete older nfcapd files for you, so that your disk don't fill up. By default, 3 days worth of files will be kept. This can be adjusted by making a netsage_override.xml file: + +```sh +cp compose/importer/netsage_shared.xml userConfig/netsage_override.xml +``` + +At the bottom of the file, edit this section to set the number of days worth of files to keep. Set cull-enable to 0 for no culling. Eg, to save 7 days worth of data: +````xml + + 1 + 7 + +```` + +You will also need to uncomment these lines in docker-compose.override.yml: + +```yaml + volumes: + - ./userConfig/netsage_override.xml:/etc/grnoc/netsage/deidentifier/netsage_shared.xml +``` + + +## To Customize Java Settings / Increase Memory Available for Lostash + +If you need to modify the amount of memory logstash can use or any other java settings, +rename the provided example for JVM Options and tweak the settings as desired. + +```sh +cp userConfig/jvm.options_example userConfig/jvm.options +``` + +Also update the docker-compose.override.xml file to uncomment lines in the logstash section. It should look something like this: + +```yaml +logstash: + image: netsage/pipeline_logstash:latest + volumes: + - ./userConfig/jvm.options:/usr/share/logstash/config/jvm.options +``` + +Here are some tips for adjusting the JVM heap size (https://www.elastic.co/guide/en/logstash/current/jvm-settings.html): + +- The recommended heap size for typical ingestion scenarios should be no less than 4GB and no more than 8GB. +- CPU utilization can increase unnecessarily if the heap size is too low, resulting in the JVM constantly garbage collecting. You can check for this issue by doubling the heap size to see if performance improves. +- Do not increase the heap size past the amount of physical memory. Some memory must be left to run the OS and other processes. As a general guideline for most installations, don’t exceed 50-75% of physical memory. The more memory you have, the higher percentage you can use. +- Set the minimum (Xms) and maximum (Xmx) heap allocation size to the same value to prevent the heap from resizing at runtime, which is a very costly process. + +## To Bring up Kibana and Elasticsearch Containers + +The file docker-compose.develop.yaml can be used in conjunction with docker-compose.yaml to bring up the optional Kibana and Elastic Search components. + +This isn't a production pattern but the tools can be useful at times. Please refer to the [Docker Dev Guide](../devel/docker_dev_guide#optional-elasticsearch-and-kibana) + +## For Data Saved to an NFS Volume + +By default, data is saved to subdirectories in the ./data directory. If you would like to use an NFS mount instead you will need to either + +1. export the NFS volume as ${PROJECT_DIR}/data (which is the idea scenario and least intrusive) +2. update the path to the NFS export path in all locations in docker-compose.yml and docker-compose.override.yml + +Note: modifying all the paths in the two files should work, but may not. In one case, it worked to modify only the paths for the collector volumes (eg, - /mnt/nfs/netsagedata/netflow:/data), leaving all others with their default values. + +:::warning +If you choose to update the docker-compose file, keep in mind that those changes will cause a merge conflict on upgrade. +You'll have to manage the volumes exported and ensure all the paths are updated correctly for the next release manually. +::: diff --git a/website/versioned_docs/version-1.2.10/deploy/docker_install_simple.md b/website/versioned_docs/version-1.2.10/deploy/docker_install_simple.md new file mode 100644 index 00000000..bf3d6856 --- /dev/null +++ b/website/versioned_docs/version-1.2.10/deploy/docker_install_simple.md @@ -0,0 +1,103 @@ +--- +id: docker_install_simple +title: Docker Installation Guide +sidebar_label: Docker Installation +--- +In this deployment guide, you will learn how to deploy a basic Netsage setup that includes one sflow and/or one netflow collector. If you have more than one collector of either type, or other special situations, see the Docker Advanced guide. + +The Docker containers included in the installation are + - rabbit (the local RabbitMQ server) + - sflow-collector (receives sflow data and writes nfcapd files) + - netflow-collector (receives netflow data and writes nfcapd files) + - importer (reads nfcapd files and puts flows into a local rabbit queue) + - logstash (logstash pipeline that processes flows and sends them to their final destination, by default a local rabbit queue) + - ofelia (cron-like downloading of files used by the logstash pipeline) + +The code and configs for the importer and logstash pipeline can be viewed in the netsage-project/netsage-pipeline github repo. See netsage-project/docker-nfdump-collector for code related to the collectors. + + +### 1. Set up Data Sources +The data processing pipeline needs data to ingest in order to do anything, of course. There are three types of data that can be consumed. + + - sflow + - netflow + - tstat + +At least one of these must be set up on a sensor (flow exporter/router), to provide the incoming flow data. +You can do this step later, but it will helpful to have it working first. + +Sflow and netflow data should be exported to the pipeline host where there are collectors (nfcapd and/or sfcapd processes) ready to receive it (see below). To use the default settings, send sflow to port 9998 and netflow to port 9999. On the pipeline host, allow incoming traffic from the flow exporters, of course. + +Tstat data should be sent directly to the logstash input rabbit queue "netsage_deidentifier_raw" on the pipeline host. No collector is needed for tstat data. See the netsage-project/tstat-transport repo. (From there, logstash will grab the data and process it the same way as it processes sflow/netflow data. (See the Docker Advanced guide.) + +### 2. Clone the Netsage Pipeline Project + +If you haven't already, install [Docker](https://www.docker.com) and [Docker Compose](https://docs.docker.com/compose/install/) and clone this project +```sh +git clone https://github.com/netsage-project/netsage-pipeline.git +``` +(If you are upgrading to a new release, see the Upgrade section below!) + +Then checkout the right version of the code. +```sh +git checkout {tag} +``` +Replace "{tag}" with the release version you intend to use, e.g., "v1.2.8". ("Master" is the development version and is not intended for general use!) +`git status` will confirm which branch you are on, e.g., master or v1.2.8. + +### 3. Create Docker-compose.override.yml + +Information in the `docker-compose.yml` file tells docker which containers (processes) to run and sets various parameters for them. +Settings in the `docker-compose.override.yml` file will overrule and add to those. Note that docker-compose.yml should not be edited since upgrades will replace it. Put all customizations in the override file, since override files will not be overwritten. + +Collector settings may need to be edited by the user, so the information that docker uses to run the collectors is specified (only) in the override file. Therefore, docker-compose_override.example.yml must always be copied to docker-compose_override.yml. + +```sh +cp docker-compose.override_example.yml docker-compose.override.yml +``` + +By default docker will bring up a single netflow collector and a single sflow collector. If this matches your case, you don't need to make any changes to the docker-compose.override_example.yml. If you have only one collector, remove or comment out the section for the one not needed so the collector doesn't run and simply create empty nfcapd files. +:::note +If you only have one collector, you should remove or comment out the section for the collector that is not used, so it doesn't run and just create empty files. +::: + +This file also specifies port numbers, and directories for nfcapd files. By default, the sflow collector will listen to udp traffic on localhost:9998, while the netflow collector will listen on port 9999, and data will be written to `/data/input_data/`. Each collector is namespaced by its type so the sflow collector will write data to `/data/input_data/sflow/` and the netflow collector will write data to `/data/input_data/netflow/`. Change these only if required. + +Other lines in this file you can ignore for now. + +:::note +If you run into issues, try removing all the comments in the override file as they may conflict with the parsing done by docker-compose +::: + + +### 4. Create Environment File + +{@import ../components/docker_env.md} + +### 5. Choose Pipeline Version + +Once you've created the docker-compose.override.xml file and finished adjusting it for any customizations, you're ready to select which version Docker should run. + +```sh +./scripts/docker_select_version.sh +``` +When prompted, select the **same version** you checked out earlier. +This script will replace the version numbers of docker images in the docker-compose files with the correct values. + +## Running the Collectors + +After selecting the version to run, you could start the two flow collectors by themselves by running the following line. If you only need one of the collectors, remove the other from this command. + +(Or see the next section for how to start all the containers, including the collectors.) + +```sh +docker-compose up -d sflow-collector netflow-collector +``` + +If the collector(s) are running properly, you should see nfcapd files in subdirectories of data/input_data/, and they should have sizes of more than a few hundred bytes. (See Troubleshooting if you have problems.) + + +## Running the Collectors and Pipeline + +{@import ../components/docker_pipeline.md} + diff --git a/website/versioned_docs/version-1.2.10/deploy/docker_troubleshooting.md b/website/versioned_docs/version-1.2.10/deploy/docker_troubleshooting.md new file mode 100644 index 00000000..1ad608ab --- /dev/null +++ b/website/versioned_docs/version-1.2.10/deploy/docker_troubleshooting.md @@ -0,0 +1,61 @@ +--- +id: docker_troubleshoot +title: Docker Troubleshooting +sidebar_label: Troubleshooting +--- + +## Troubleshooting + +### If you are not seeing flows after installation + +**Troubleshooting checklist:** + +- Make sure you configured your routers to point to the correct address/port where the collector is running.  +- Check iptables on your pipeline host to be sure incoming traffic from the routers is allowed. +- Use `docker-compose ps` to be sure the collectors (and other containers) are running. +- Check to see if nfcapd files are being written. There should be a directory for the year, month, day and files should be larger than a few hundred bytes. If the files exist but are too small, the collector is running but there are no incoming flows. "nfdump -r filename" will show the flows in a file. +- Make sure you created .env and docker-compose.override.yml files and updated the settings accordingly, sensorName especially since that identifies the source of the data. +- Check the logs of the various containers to see if anything jumps out as being invalid.  `docker-compose logs -f $service_label` +- Check the logs to see if logstash is starting successfully. +- If the final rabbit queue is on an external host, check iptables on that host to be sure incoming traffic from your pipeline host is allowed. + +To see if flows are getting into and being read from the rabbit queue on the pipeline host, you can go to `http://localhost:15672` in your favorite web browser. Login as guest with password guest. Look for accumulating messages and/or messages being acknowledged and published. + +### If flow collection stops + +**Logstash or Importer errors:** +- Make sure all containers are running. `docker ps` +- Check the logs of the various containers to see if anything jumps out as being invalid.  `docker-compose logs -f $service_label` +- Check the logs to see if logstash is starting successfully. + +**Disk space:** +- If the pipeline suddenly fails, check to see if the disk is full. If it is, first try getting rid of old docker images and containers to free up space: `docker image prune -a` and `docker container prune`. +- Also check to see how much space the nfcapd files are comsuming. You need to add more disk space. You could try saving fewer days of nfcapd files (see Docker Advanced). + +**Memory:** +- If you are running a lot of data, sometimes docker may need to be allocated more memory. The most +likely culprit is logstash which is usually only allocated 2GB of RAM. You'll need to update the jvm.options file to grant it more memory. + +Please see the [Docker Advanced guide](docker_install_advanced.md#customize-logstash-settings) for details on how to customize logstash. + +Applying this snippet to logstash may help. For example, to give logstash (java) 3GB, + +```yaml +environment: + LS_JAVA_OPTS=-Xmx3g +``` + +Alternatively you may also try doing this: + +```yaml +deploy: + resources: + limits: + cpus: "0.50" + memory: 50M + reservations: + cpus: "0.25" + memory: 20M +``` + +Reference: https://docs.docker.com/compose/compose-file/#resources + diff --git a/website/versioned_docs/version-1.2.10/deploy/docker_upgrade.md b/website/versioned_docs/version-1.2.10/deploy/docker_upgrade.md new file mode 100644 index 00000000..a598caac --- /dev/null +++ b/website/versioned_docs/version-1.2.10/deploy/docker_upgrade.md @@ -0,0 +1,79 @@ +--- +id: docker_upgrade +title: Upgrading +sidebar_label: Docker Upgrading +--- + +To upgrade a previous installment of the Dockerized pipeline, perform the following steps. + +### Shut things down + +```sh +cd {netsage-pipeline directory} +docker-compose down +``` +This will stop all the docker containers, including the importer, logstash, and any collectors. Note that incoming flow data will not be saved during the time the collectors are down. + +### Update Source Code + +To upgrade to a new release, just reset and pull changes including the new release from github. Your customized .env and override files will not be overwritten. + +```sh +git reset --hard +git pull origin master +``` + +:::warning +git reset --hard will obliterate any changes you have made to non-override files. If necessary, please make sure you commit and save to a feature branch before continuing. + +Example: +```git commit -a -m "Saving local state"; git checkout -b feature/backup; git checkout master``` +::: + +### Check/Update Override Files +Occasionally, the required version of Docker or nfdump may change, which will necessitate editing your override and/or env files. + +- Compare the new `docker-compose.override_example.yml` file to your `docker-compose.override.yml` to see if a new version of Docker is required. Look for, eg, `version: "3.7"` at the top. If the version number is different, change it in your docker-compose.override.yml file and upgrade Docker manually. + +- Also check to see if the version of nfdump has changed. Look for lines like `image: netsage/nfdump-collector:1.6.18`. Make sure the version in your override file matches what is the example file. (You do not need to actually perform any upgrade yourself. This will ensure the correct version is pulled from Docker Hub.) +Note that you do not need to update the versions of the importer or logstash images. That will be done for you in the "select release version" stop coming up. + +- Also compare your `.env` file with the new `env.example` file to see if any new lines or sections have been added. If there have been any changes relevant to your deployment, eg, new options you want to use, copy the changes into your .env file. + +- If you used the Docker Advanced guide to make a `netsage_override.xml` file, compare it to `netsage_shared.xml` to see if there are any changes. This is unlikely. + +### Select Release Version + +Run these two commands to select the new release you want to run. In the first, replace "{tag}" by the version to run (eg, v1.2.10). When asked by the second, select the same version as the tag you checked out. +```sh +git checkout -b {tag} +git pull +./scripts/docker_select_version.sh +``` +Check to be sure docker-compose.yml and docker-compose.override.yml both now have the version number you selected. + +### Update Docker Containers + +Do not forget this step! Pull new images from Docker Hub. This applies for both development and release versions. + +``` +docker-compose pull +``` + +### Restart all the Docker Containers + +``` +docker-compose up -d +``` + +This will start all the services/containers listed in the docker-compose.yml and docker-compose.override.yml files, including the importer, logstash pipeline, and collectors. + +### Delete old images and containers + +To save space, delete any old images and containers that are not being used. + +``` +docker image prune -a +docker container prune +``` + diff --git a/website/versioned_docs/version-1.2.10/devel/docker.md b/website/versioned_docs/version-1.2.10/devel/docker.md new file mode 100644 index 00000000..76735113 --- /dev/null +++ b/website/versioned_docs/version-1.2.10/devel/docker.md @@ -0,0 +1,83 @@ +--- +id: docker_dev_guide +title: Docker Dev Guide +sidebar_label: Docker Dev Guide +--- + +## Selecting a Version + +You can use the "master" version or a tagged version. +To select a released version use the docker_select_version.sh script (see the Deployment Guide). +If you wish to use the development version (master branch) simply scip the docker_select_version.sh step. + +## Installing + +See the Deployment Guide to learn how to set up collectors, your environment and override files, etc. + +## Importer + +The importer "shared" config that Docker uses is defined in compose/netsage_shared.xml. ** NOTE: If you want to make changes to this file, you will need to rebuild the container** + +## Build Images + +The images are published on Docker Hub, but if you'd like to incorporate local changes please follow the process below. + +### Build Using Source Code + +If you would like to build the *importer* container using the version of the pipeline scripts found in the GitHub repo then run the following: + +```sh +docker-compose -f docker-compose.build.yml build + +``` + +NOTE: The importer container includes the config files for the logstash pipeline. + + +## Optional: ElasticSearch and Kibana + +You can optionally store flow data locally in an ElasticSearch container and view the data with Kibana. Local storage can be enabled with the following steps: + +1. Uncomment the following lines in conf-logstash/99-outputs.conf: + +``` +elasticsearch { + hosts => ["elasticsearch"] + index => "netsage_flow-%{+YYYY.MM.dd}" +} +``` + +2. Comment out the `rabbitmq {...}` block in conf-logstash/99-outputs.conf if you do not want to also send logstash output to RabbitMQ. + +3. Run the containers using the following line: ` ` ` docker-compose -f docker-compose.yml -f docker-compose.develop.yml up -d ` ` ` + +## Handy Docker Commands + +### Start the Containers + +``` sh +docker-compose up -d +``` + +### Stop the Containers + +``` sh +docker-compose stop && docker-compose rm +``` + +### Enter a Container Shell + +``` sh +docker-compose exec logstash bash #bash shell in logstash container +docker-compose exec importer bash #bash shell in importer container +docker-compose exec rabbit bash #bash shell in rabbit container +``` + +### View Container Logs + +``` sh +docker-compose logs -f #view logs for all containers +docker-compose logs -f logstash #view logs for logstash container +docker-compose logs -f importer #view logs for importer container +docker-compose logs -f rabbit #view logs for rabbit container +``` diff --git a/website/versioned_docs/version-1.2.10/devel/documentation_guide.md b/website/versioned_docs/version-1.2.10/devel/documentation_guide.md new file mode 100644 index 00000000..06c1c4a8 --- /dev/null +++ b/website/versioned_docs/version-1.2.10/devel/documentation_guide.md @@ -0,0 +1,142 @@ +--- +id: docusaurus +title: Revising Documentation +sidebar_label: Docusaurus +--- + +This project's documentation uses Docusaurus. + +Docusaurus converts markdown into html and builds a static website using React UI components, which can be exported to a webserver. + +Yarn is a package manager for JavaScript and replaces the npm client. It is not strictly necessary but highly encouraged. + +To extend the docs simply create a markdown file and reference the ID in the side bar config. Please see the related documentation +at the [docusaurus 2](https://v2.docusaurus.io/) project website. + +*THE FOLLOWING INSTRUCTIONS ARE NOT CONFIRMED TO WORK. PLEASE UPDATE WITH CORRECTIONS.* + +## If Not Using Docker +These are instructions for editing and releasing docs without using Docker. + +### Installation + +To get started the first time, install npm, then use that to install yarn +``` +$ sudo yum install npm +$ sudo npm install -g yarn +``` + +Git clone the netsage pipeline project, then run yarn install to get all the dependencies listed within package.json +``` +$ cd netsage-pipeline/website +$ yarn install +``` + +### Local Development + +If you are working on your local machine, you can view changes to the docs in a browser as you work. Use the following commands to generate the static website content (gets written into the build directory), then start a local development server and open up a browser window in which to view the docs. Most changes you make will be reflected live without having to restart the server. +``` +$ yarn build +$ yarn start +go to http://localhost:3000 +``` + +### To Make Changes +Whether on a local machine or a linux host, to make changes, edit the files in website/docs/. +When finished, git add, git commit, git push, as usual. +Repeat as needed. + + +### Tagging a New release + +When it's time to release a new version of the Pipeline, you need to create a new version of the docs as well. + +Once the documentation is stable and you don't forsee any new change, please do the following: + +``` +$ yarn run docusaurus docs:version a.b.c +``` + +replacing a.b.c with the next release version number. +This will create new versioned docs in website/versioned_docs/. + +Then edit docusaurus.config.js and change `lastVersion:` to refer to the new version number. + +Finally, commit and push the following to github: + * website/versioned_docs/version-a.b.c/ + * website/versioned_sidebars/version-a.b.c.sidebars.json + * versions.json + * docusaurus.config.js + + +### Deploying Docs to github.io +Whether you have created a new set of versioned tags or just want to update the docs in "master", to make changes appear at https://netsage-project.github.io/netsage-pipeline, do the following. + +If Travis or some other CI is working, it will run yarn install and yarn deploy to do this automatically. + +If it is not, do it manually: +``` +$ USE_SSH="true" GIT_USER="your-username" yarn deploy +``` +replacing your-username. This sets a couple env vars then runs 'yarn deploy' which runs 'docusaurus deploy' (see package.json) which pushes the static website created to url: "https://netsage-project.github.io" (see docusaurus.config.js) + +NOTE: You need to have created ssh keys on the host you are running this on and added them to your github account. + +### Removing a version + +To remove version 1.2.6 for example. + +we need to: + + * update versions.json to remove the reference + * remove the versioned_docs/version-1.2.6 + * remove versioned_sidebars/version-1.2.6-sidebars.json + +## If Using Docker + +You may also use a docs Docker container to simplify installation, making changes, and deployment. This method starts a local web server that allows you to see changes to the docs in a browser on your local machine, as they are made. + +### Build and Start the Container + +Git clone the netsage pipeline project then build and start the container. +The Dockerfile in website/ tells how to build an image that runs yarn. Docker-compose.yml brings up a docs container. +``` +$ cd netsage-pipeline/website +$ docker-compose build build_docs +$ docker-compose up -d docs +go to http://localhost:8000/netsage-pipeline/ +``` + +### To Make Changes +Whether on a local machine or a linux host, to make changes, edit the files in website/docs/. +When finished, git add, git commit, git push, as usual. +Repeat as needed. + +### Tagging a New release + +When it's time to release a new version of the Pipeline, you need to create a new version of the docs as well. + +Once the documentation is stable and you don't forsee any new change, please do the following: + +``` +$ docker-compose build build_docs +$ docker-compose run docs yarn run docusaurus docs:version a.b.c +``` +replacing a.b.c with the next release version number. +This will create new versioned docs in website/versioned_docs/. + +Then edit docusaurus.config.js and change `lastVersion:` to refer to the new version number. + +Finally, commit and push the following to github: + * website/versioned_docs/version-a.b.c/ + * website/versioned_sidebars/version-a.b.c.sidebars.json + * versions.json + * docusaurus.config.js + + +### Deploying Docs to github.io +How to do this when using Docker ??? Get into the container ??? + +For now, go a linux server that has yarn installed and +follow the instructions under If Not Using Docker. + diff --git a/website/versioned_docs/version-1.2.10/devel/pipeline_dataset.md b/website/versioned_docs/version-1.2.10/devel/pipeline_dataset.md new file mode 100644 index 00000000..a061957d --- /dev/null +++ b/website/versioned_docs/version-1.2.10/devel/pipeline_dataset.md @@ -0,0 +1,34 @@ +--- +id: dev_dataset +title: Pipeline Replay Dataset +sidebar_label: Replay Dataset +--- + +The Netsage Pipeline processes network data. Though there are some components and patterns we can use to test +the behavior using things like the Ruby unit [tests](https://github.com/netsage-project/netsage-pipeline/tree/master/conf-logstash/ruby/spec) in logstash, and the [generator](https://www.elastic.co/guide/en/logstash/current/plugins-inputs-generator.html) pligin, but the best +test is to replay network data and inspect the output in the grafana dashboard. + +Two sample data set are provided for the two types of collectors we have (Netflow and Sflow). The network data and ips have been anonymized and should have no identifying information. + +You can download the files from [here](https://drive.google.com/drive/folders/19fzY5EVoKwtYUaiBJq5OxAR82yDY0taG). + +Please take note of which ports the collectors are listing on. Check your docker-compose.override.yml file. If you are using default ports, they should match this [example](https://github.com/netsage-project/netsage-pipeline/blob/master/docker-compose.override_example.yml). + +Currently the default ports are: + - 9998/udp for sflow + - 9999/udp for netflow + +Naturally the collectors have to be running in order for any of this to be usable. You can read more on how to get them running in the [Docker Simple Deployment Guide](../deploy/docker_install_simple.md#running-the-collectors) + +In order to replay the data, use the following commands for netflow and sflow respectively: + +### Netflow + +``` +nfreplay -H 127.0.0.1 -p 9999 -r nfcapd-ilight-anon-20200114 -v 9 -d 1000 +``` + +### Sflow + +Coming soon. nfreplay will not work with sflow data type. + diff --git a/website/versioned_docs/version-1.2.10/devel/tag.md b/website/versioned_docs/version-1.2.10/devel/tag.md new file mode 100644 index 00000000..040de851 --- /dev/null +++ b/website/versioned_docs/version-1.2.10/devel/tag.md @@ -0,0 +1,46 @@ +--- +id: docker_dev_tag +title: How to Tag a New Release +sidebar_label: Taggin a Release +--- + +To tag a new release, first updated the version number and Changes file, build the rpm, etc. and upgrade on bare-metal hosts using yum. If all works fine, do the following steps to create new Docker images. + +## In Github, Create a Release/Tag + +Be sure to copy info from the Changes file into the Release description. + +Do this first ??? + +## To Build and Push an Importer Image Manually + +Git clone the pipeline project and have the ?? branch checked out. + +``` +$ docker-compose build +$ docker login +$ docker push $image:$tag +``` + +This will build the image and push it to Docker Hub. + +The person doing this has to have a Docker Hub account and belong to the Netsage team (3 users are allowed, for the free level). + +## With Automation + + +## Versioned Docs + +A new set of versioned docs also has to be tagged. See the Docusaurus guide. + +Does this have to happen before Building the image ?? + +## New Version of Nfdump + +If a new version of dfdump has been released that we need, +???? + +## New Version of Logstash + +If a new version of logstash has been released that we want everyone to use, +??? diff --git a/website/versioned_docs/version-1.2.10/pipeline/elastic_search.md b/website/versioned_docs/version-1.2.10/pipeline/elastic_search.md new file mode 100644 index 00000000..047643da --- /dev/null +++ b/website/versioned_docs/version-1.2.10/pipeline/elastic_search.md @@ -0,0 +1,123 @@ +--- +id: elastic +title: Elasticsearch +sidebar_label: Elasticsearch +--- + +Flow data is ultimately saved to Elasticsearch. Following are the fields that are used/created in Logstash and that you may see returned by an elasticsearch query. + +### Flow fields + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|start |Jun 9, 2020 @ 17:39:53.808 | Start time of the flow (first packet seen)| +|end |Jun 9, 2020 @ 17:39:57.699 |End time of the flow (last packet seen)| +|meta.id | a17c4f05420d7ded9eb151ccd293a633 ff226d1752b24e0f4139a87a8b26d779 |Id of the flow (hash of 5-tuple + Sensor name)| +|meta.flow_type |sflow |'sflow', 'netflow', or 'tstat'| +|meta.protocol |tcp |Protocol used| +|meta.sensor_id | snvl2-pw-sw-1-mgmt-2.cenic.net|Sensor name (set in importer config, may not always be a hostname) | +|meta.sensor_group |CENIC |Sensor group, usually the network | +|meta.sensor_type |Regional Network |Sensor type ('Circuit', 'Regional Network', etc) | +|meta.country_scope |Domestic |'Domestic', 'International', or 'Mixed', depending on countries of src and dst| +|meta.is_network_testing | no | 'yes' if discipline is 'CS.Network Testing and Monitoring' or port is one used for PerfSonar: 5001, 5101, or 5201| + +### Source Fields (Destination Fields similarly with "dst") + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|meta.src_ip |171.64.68.x | deidentified IP address| +|meta.src_port |80 |port used | +|meta.src_asn |32 |Source ASN from the flow header or, in some cases, the ANS of the IP from the MaxMind GeoIP ASN database| +|meta.src_organization |Stanford University | organization that owns the AS from the CAIDA ASN-Organization database +|meta.src_location.lat | 37.423 | latitude of the IP from the MaxMind GeoIP City database| +|meta.src_location.lon |-122.164 | longitude of the IP from the MaxMind GeoIP City database| +|meta.src_country_name |United States | country of the IP from the MaxMind GeoIP City database| +|meta.src_continent |North America | continent of the IP the MaxMind GeoIP City database| +|meta.src_ifindex |166 |the index of the interface the flow came into| + +### Source Science Registry Fields (Destination Fields similarly with "dst") +The [Science Registry](https://scienceregistry.netsage.global/rdb/) stores human-curated information about various "resources". Resources are sources and destinations of flows. + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|meta.scireg.src.discipline |MPS.Physics.High Energy |The science discipline that uses the resource (ie IP). Note that not the src MAY not have the same discipline as the dst. | +|meta.scireg.src.role |Storage |Role that the host plays | +|meta.scireg.src.org_name |Boston University (BU) |The organization the manages and/or uses the resource, as listed in the Science Registry| +|meta.scireg.src.org_abbr |Boston U |A shorter name for the organization. May not be the official abbreviation.| +|meta.scireg.src.resource |BU - ATLAS |Descriptive resource name from SciReg | +|meta.scireg.src.resource_abbr | |Resource abbreviation (if any)| +|meta.scireg.src.project_names |ATLAS |"Projects" that the resource is part of| +|meta.scireg.src.latitude |37.4178 |Resource's latitude, as listed in the Science Registry| +|meta.scireg.src.longitude |-122.178 |Resource's longitude, as listed in the Science Registry| + +### Source "Preferred" Fields (Destination Fields similarly with "dst") + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|meta.src_preferred_org |Stanford University |If the IP was found in the Science Registry, this is the SciReg organization, otherwise it is the CAIDA organization| +|meta.src_preferred_location.lat |37.417800 | Science Registry value if available, otherwise the MaxMind City DB value| +|meta.src_preferred_location.lon |-122.172000i | Science Registry value if available, otherwise the MaxMind City DB value | + +### Value Fields + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|values.num_bits |939, 458, 560 |Sum of the number of bits in the (stitched) flow| +|values.num_packets |77, 824 |Sum of the number of packets in the (stitched) flows| +|values.duration |3.891 |Calculated as end minus start.| +|values.bits_per_second |241, 443, 988 |Calculated as num_bits divided by duration | +|values.packets_per_second |20, 001 |Calculated as num_packets divided by duration| + +### Tstat Value Fields + +|name |example | +|-----------------------|-----------------------| +|values.tcp_cwin_max |1549681 | +|values.tcp_cwin_min |17| +|values.tcp_initial_cwin|313| +|values.tcp_max_seg_size|64313| +|values.tcp_min_seg_size|17| +|values.tcp_mss |8960| +|values.tcp_out_seq_pkts|0| +|values.tcp_pkts_dup |0| +|values.tcp_pkts_fc |0| +|values.tcp_pkts_fs |0| +|values.tcp_pkts_reor |0| +|values.tcp_pkts_rto |0| +|values.tcp_pkts_unfs |0| +|values.tcp_pkts_unk |2| +|values.tcp_pkts_unrto |0| +|values.tcp_rexmit_bytes |1678| +|values.tcp_rexmit_pkts |2| +|values.tcp_rtt_avg |0.044| +|values.tcp_rtt_max |39.527| +|values.tcp_rtt_min |0.001| +|values.tcp_rtt_std |0.276| +|values.tcp_sack_cnt | 1| +|values.tcp_win_max |1549681| +|values.tcp_win_min |17| +|values.tcp_window_scale |13| + +### Developer Fields + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|@ingest_time |Jun 9, 2020 @ 10:03:20.700 | Essentially time the flow went into the logstash pipeline or the time stitching of the flow commenced| +|@timestamp |Jun 9, 2020 @ 18:03:21.703 |The time the flow went into the logstash pipeline for tstat flows, or the time stitching finished and the event was pushed for other flows.| +|@exit_time |Jun 9, 2020 @ 18:03:25.369 |The time the flow exited the pipeline | +|@processing_time |688.31 |@exit_time minus @ingest_time. Useful for seeing how long stitching took. | +|stitched_flows |13 |Number of flows that came into logstash that were stitched together to make this final one. 1 if no flows were stitched together. 0 for tstat flows, which are never stitched. | +|es_doc_id |4f46bef884... |Hash of meta.id and start time. May be used as doc id in ES to prevent duplicates, but see Notes elsewhere.| +|tags |maxmind src asn |Various info and error messages| +|trial | 5 |Can be set in 40-aggregation.conf if desired| + +### Elasticsearch Fields + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|_index | om-ns-netsage-2020.06.14 | name of the index ("database table") | +|_type |_doc | set by ES | +|_id |HRkcm3IByJ9fEnbnCpaY | elasticsearch document id. If es_doc_id is provided, that is used. | +|_score |1 |set by ES query | +|@version |1 | set by ES | + diff --git a/website/versioned_docs/version-1.2.10/pipeline/importer.md b/website/versioned_docs/version-1.2.10/pipeline/importer.md new file mode 100644 index 00000000..24b05c4b --- /dev/null +++ b/website/versioned_docs/version-1.2.10/pipeline/importer.md @@ -0,0 +1,14 @@ +--- +id: importer +title: Importer +sidebar_label: Importer +--- +A netsage-netflow-importer script reads any new nfcapd files that have come in after a configurable delay and writes the results to the "netsage_deidentifier_raw" RabbitMQ queue. +All flow data waits in the queue until it is read in and processed by the logstash pipeline. + +To read nfcapd files, the importer uses an nfdump command with the "-a" option to aggregate raw flows within the file by the "5-tuple," i.e., the source and destination IPs, ports, and protocol. The "-L" option is used to throw out any aggregated flows below a threshold number of bytes. This threshold is specified in the importer config file. + +### Configuration +Configuration files for the importer are netsage_netflow_importer.xml and netsage_shared.xml in /etc/grnoc/netsage/deidentfier/. Comments in the files briefly describe the options. See also the Deployment pages in these docs. + +To avoid re-reading nfcapd files, the importer stores the names of files that have already been read in /var/cache/netsage/netflow_importer.cache. diff --git a/website/versioned_docs/version-1.2.10/pipeline/intro.md b/website/versioned_docs/version-1.2.10/pipeline/intro.md new file mode 100644 index 00000000..f4cce287 --- /dev/null +++ b/website/versioned_docs/version-1.2.10/pipeline/intro.md @@ -0,0 +1,37 @@ +--- +id: intro +title: Intro +sidebar_label: Intro +--- +# The NetSage Pipeline + +## Description + +The Netsage Flow Processing Pipeline is composed of several components for processing network flow data, including importing, deidentification, metadata tagging, flow stitching, etc. +There are many ways the components can be combined, configured, and run. These documents will describe the standard "simple" set up and provide information for more complex configurations. + +## Data Collection + +In Netsage, sensor(s) are network devices configured to collect flow data ([tstat](http://tstat.polito.it/), [sflow](https://www.rfc-editor.org/info/rfc3176), or [netflow](https://www.cisco.com/c/en/us/products/collateral/ios-nx-os-software/ios-netflow/prod_white_paper0900aecd80406232.html)) and send it to a "pipeline host" for processing. + +Tstat flow data can be sent directly to the pipeline ingest RabbitMQ queue on the pipeline host using the Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) tool. This can be installed as usual or via Docker. + +Sflow and netflow data from configured routers should be sent to the pipeline host where it is collected and stored into nfcapd files using [nfdump tools](https://github.com/phaag/nfdump). The Netsage project has packaged the nfdump tools into a [Docker container](https://github.com/netsage-project/docker-nfdump-collector) for ease of use. + +## Pipeline Components + +The Netsage Flow Processing Pipeline is made of the following components + + - Importer: Perl scripts on the pipeline host that read nfcapd flow files and send the flow data to a RabbitMQ queue. ([Doc](importer.md), [in github](https://github.com/netsage-project/netsage-pipeline/blob/master/lib/GRNOC/NetSage/Deidentifier/NetflowImporter.pm)) + - [RabbitMQ](https://www.rabbitmq.com/): Used for message passing and queuing of tasks. + - [Logstash](https://www.elastic.co/logstash) pipeline: Performs a variety of operations on the flow data to transform it and add additional information. ([Doc](logstash.md)) + - [Elasticsearch](https://www.elastic.co/what-is/elasticsearch): Used for storing the final flow data. + +## Visualization + +[Grafana](https://grafana.com/oss/grafana/) or [Kibana](https://www.elastic.co/kibana) can be used to visualize the data stored in elasticsearch. Netsage Grafana Dashboards are available [in github](https://github.com/netsage-project/netsage-grafana-configs). + +## Pipeline Installation + +Originally, the pipeline was deployed by installing all of the components individually on one or more servers (the "BareMetal" or "Manual" Install). More recently, we've also added a Docker deployment option. With simple pipelines having just one sflow and/or one netflow sensor (and any number of tstat sensors), the basic "Docker Installation" should suffice. The "Docker Advanced Options" guide will help when there are more sensors and/or other customizations required. + diff --git a/website/versioned_docs/version-1.2.10/pipeline/logstash.md b/website/versioned_docs/version-1.2.10/pipeline/logstash.md new file mode 100644 index 00000000..b27e2ee7 --- /dev/null +++ b/website/versioned_docs/version-1.2.10/pipeline/logstash.md @@ -0,0 +1,127 @@ +--- +id: logstash +title: Logstash Pipeline +sidebar_label: Logstash +--- + +The Logstash portion of the Netsage Pipeline reads in flows from a RabbitMQ queue, performs various transformations and adds additional information to them, then sends them to a location specified in the output logstash config, eventually ending up in an Elasticsearch instance. + +Logstash config files invoke various logstash "filters" and actions. These conf files are located in /etc/logstash/conf.d/. See below for a brief description of what each does and check the files for comments. + +Notes: + - All \*.conf files in conf.d/ are executed in alphabetical order, as if they were one huge file. Those ending in .disabled will not be executed (assuming 'path.config: "/etc/logstash/conf.d/*.conf"' in /etc/logstash/pipelines.yml). + - If actions in a particular .conf file are not needed in your particular case, they can be removed or the file disabled, but check carefully for effects on downstream configs. + - MaxMind, CAIDA, and Science Registry database files required by the geoip and aggregate filters are downloaded from scienceregistry.netsage.global via cron jobs weekly or daily. (MaxMind data can change weekly, CAIDA quarterly, Science Registry information randomly.) **NOTE that new versions won't be used in the pipeline until logstash is restarted.** There is a cron file to do this also, though it's not running in Docker deployments. Similarly for other support files, eg, those used in 90-additional-fields.conf. + - Lookup tables for 55-member-orgs.conf that we have compiled are available from sciencregistry.grnoc.iu.edu. See the cron files provided. These will not be updated often, so you may run the cron jobs or not. You will need to provide lists for other networks yourself or ask us. + +## Logstash Sequence + +### 01-input-rabbit.conf + +Reads flows from a rabbitmq queue. (The ".disabled" extenstion can be removed from other 01-input configs available in conf.d/ to get flows from other sources.) + +### 10-preliminaries.conf + +Drops flows to or from private IP addresses; +converts any timestamps in milliseconds to seconds; +drops events with timestamps more than a year in the past or (10 sec) in the future; +does some data type conversions; +adds @ingest_time (this is mainly for developers). + +### 15-sensor-specific-changes.conf + +Makes any changes to fields needed for specific sensors. This config currently provides 1) the ability to drop all flows that do not use interfaces (ifindexes) in a specfied list, 2) the ability to change the sensor name for flows from a specified sensor which go through a certain interface, and 3) the ability to apply a sampling rate correction manually for named sensors. You may edit the file in a bare-metal installation and specify everything explicitly (upgrades will not overwrite this config) or you may use the environment file specified in the systemd unit file. For Docker installations, use the .env file to specifiy the parameters. By default, this config will do nothing since the flags will be set to False. + +### 20-add_id.conf + +Adds a unique id (evenutally called meta.id) which is a hash of the 5-tuple of the flow (src and dst ips and ports, and protocol) plus the sensor name. This id is used for aggregating (stitching) in the next step. + +### 40-aggregation.conf + +Stitches together flows from different nfcapd files into longer flows, matching them up by meta.id and using a specified inactivity_timeout to decide when to start a new flow. + +Notes: + - By default, 5-minute nfcapd files are assumed and the inactivity_timeout is set to 10.5 minutes. If more than 10.5 min have passed between the start of the current flow and the start of the last matching one, do not stitch them together. + - If your nfcapd files are written every 15 minutes, change the inactivity_timeout to at least 16 minutes. + - There is another "timeout" setting which is basically the maximum duration of a stitched flow (default: 24 hr). + - When logstash shuts down, any flows "in the aggregator" will be written out to aggregate_maps_path (default: /tmp/logstash-aggregation-maps). The file is then read back in when logstash is restarted so aggregation can continue. + - Your logstash pipeline can have only 1 worker or aggregation is not going to work! This is set in the logstash config file. + - Tstat flows come in already complete, so no aggregation is done on those flows. + +### 45-geoip-tagging.conf + +Queries the MaxMind GeoLite2-City database by IP to get src and dst Countries, Continents, Latitudes, and Longitudes; +if the destination IP is in the multicast range, sets the destination Organization, Country, and Continent to "Multicast". + +*This product includes GeoLite2 data created by MaxMind, available from [www.maxmind.com](http://www.maxmind.com).* + +### 50-asn.conf + +Normally with sflow and netflow, flows come in with source and destination ASNs. If there is no ASN in the input event; or the input ASN is 0, 4294967295, or 23456, or it is a private ASN, tries to get an ASN by IP from the MaxMind ASN database. +Sets ASN to -1 if it is unavailable for any reason. + +### 53-caida-org.conf + +Uses the current source and destination ASNs to get organization names from the prepared CAIDA ASN-to-Organization lookup file. + +*This product uses a lookup table constructed from the CAIDA AS Organizations Dataset - see [www.caida.org](http://www.caida.org/data/as-organizations).* + +### 55-member-orgs.conf + +Searches any provided lookup tables by IP to obtain member or customer organization names and overwrite the Organization determined previously. +This allows entities which don't own their own ASs to be listed as the src or dst Organization. + +Note: These lookup tables are not stored in github, but an example is provided to show the layout and tables we have can be downloaded via a cron job. + +### 60-scireg-tagging-fakegeoip.conf + +Uses a fake geoip database containing [Science Registry](http://scienceregistry.grnoc.iu.edu) information to tag the flows with source and destination science disciplines and roles, organizations and locations, etc; +removes Registry fields we don't need to save to elasticsearch. + +Notes: + - The [Science Registry](https://scienceregistry.netsage.global/rdb/) stores human-curated information about various "resources". Resources are sources and destinations of flows. + - The Science Registry "fake geoip database" is updated weekly and can be downloaded via wget in a cron job (provided in the installation). + +### 70-deidentify.conf + +Replaces the last octet of IPv4 addresses and the last 4 hextets of IPv6 addresses with x's in order to deidentify them. + +### 80-privatize.org.conf + +Removes information about Australian organizations (or, with modification, any country that has privacy rules that require us not to identify organizations). +If the ASN is one of those listed, completely replaces the IP with x's, sets the location to central Autralia, sets all organizations to "AARNet", removes all Projects. + +### 88-preferred-location-org.conf + +Copies Science Registry organization and location values, if they exist, to the meta.preferred_organization and meta.preferred_location fields. If there are no Science Registry values, the organizations and locations from the CAIDA and MaxMind lookups, respectively, are saved to those fields. + +### 90-additional-fields.conf + +Sets additional quick and easy fields. Supporting mapping or ruby files are used - see support/ and ruby/ in conf.d/. Currently we have (for Netsage's use): + - sensor_group = TACC, AMPATH, etc. (based on matching sensor names to regexes) + - sensor_type = Circuit, Archive, Exchange Point, or Regional Network (based on matching sensor names to regexes) + - country_scope = Domestic, International, or Mixed (based on src and dst countries and possibly continents, where Domestic = US, Puerto Rico, or Guam) + - is_network_testing = yes, no (yes if discipline from the science registry is 'CS.Network Testing and Monitoring' or port = 5001, 5101, or 5201) + - es_doc_id = hash of meta.id and the start time of the flow. If this id is used as the document id in elasticsearch, flows that are mistakenly input more than once will update existing documents rather than be added as duplicates. (NOTE: due to how netflow works, use es_doc_id as the ES document id only for sflow!) + +### 95-cleanup.conf + +Does small misc. tasks at the end like rename, remove, or convert fields + +### 98-post-process.conf + +Adds @exit_time and @processing_time (these are mainly for developers) + +### 99-output-rabbit.conf + +Sends results to a final RabbitMQ queue. (".disabled" can be removed from other output configs to send flows to other places) + +### Final Stage + +In the GlobalNOC-Netsage case, the output filter writes the flows to a network-specific RabbitMQ queue on another host and the last stage is a separate logstash pipeline on a 3rd host. The latter reads flows from the final queue using a rabbitmq input filter and sends it into elasticsearch using an elasticsearch output filter with a mapping template which sets data types for the fields. + +## Field names + +The fields used/created in Logstash (and saved to Elasticsearch) are listed in the [Elasticsearch doc](elastic). + + diff --git a/website/versioned_docs/version-1.2.10/pipeline/nfdump.md b/website/versioned_docs/version-1.2.10/pipeline/nfdump.md new file mode 100644 index 00000000..b9519282 --- /dev/null +++ b/website/versioned_docs/version-1.2.10/pipeline/nfdump.md @@ -0,0 +1,17 @@ +--- +id: nfdump +title: Sflow/Netflow Data Collection +sidebar_label: Sflow/Netflow Data +--- + +Sflow and Netflow export can be configured on appropriate network devices. Netsage uses tools in the Nfdump package to collect and process the resulting flow data. The toolset supports netflow v1, v5/v7, v9, IPFIX and SFLOW, IPv4 as well as IPv6. + +## Netsage Usage + +Nfcapd and/or sfcapd processes (from the nfdump package) are used to collect incoming netflow and/or sflow data and save it to disk in nfcapd files. The files are then read by the [importer](importer), which uses an nfdump command, and sent to RabbitMQ. From there, the [logstash](logstash) pipeline ingests the flows and processes them in exactly the same way as it processes tstat flows. The data is eventually saved in elasticsearch and visualized by [grafana dashboards](https://github.com/netsage-project/netsage-grafana-configs). + +One may also use the nfdump command interactively to view the flows in a nfcapd file in a terminal window. + +## Docker Deployment + +The nfdump/nfcapd/sfcapd processes can be invoked locally or using a Docker container. The Docker deployment of the Pipeline uses an nfdump Docker container. (See the Docker Deployment Guide.) The Docker image definitions can be found [HERE](https://github.com/netsage-project/docker-nfdump-collector) diff --git a/website/versioned_docs/version-1.2.10/pipeline/tstat.md b/website/versioned_docs/version-1.2.10/pipeline/tstat.md new file mode 100644 index 00000000..baab97c5 --- /dev/null +++ b/website/versioned_docs/version-1.2.10/pipeline/tstat.md @@ -0,0 +1,16 @@ +--- +id: tstat +title: Tstat Data Collection +sidebar_label: Tstat Data +--- + +## Netsage GitHub Project + +[Tstat](http://tstat.polito.it/) is a passive sniffer that provides insights into traffic patterns. The Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) project provides client programs to parse the captured data and send it to a rabbitmq host where it can then be processed by the [logstash pipeline](logstash), stored in elasticsearch, and finally displayed in our Grafana [dashboards](https://github.com/netsage-project/netsage-grafana-configs). + +## Docker + +Netsage Docker images exist on Docker Hub for tstat and tstat_transport. This is still in a beta state and is in development. The initial documentation is available [here](https://github.com/netsage-project/tstat-transport/blob/master/docs/docker.md). + + + diff --git a/website/versioned_sidebars/version-1.2.10-sidebars.json b/website/versioned_sidebars/version-1.2.10-sidebars.json new file mode 100644 index 00000000..a82d1786 --- /dev/null +++ b/website/versioned_sidebars/version-1.2.10-sidebars.json @@ -0,0 +1,89 @@ +{ + "version-1.2.10/Pipeline": [ + { + "collapsed": true, + "type": "category", + "label": "Pipeline", + "items": [ + { + "type": "doc", + "id": "version-1.2.10/pipeline/intro" + }, + { + "type": "doc", + "id": "version-1.2.10/pipeline/tstat" + }, + { + "type": "doc", + "id": "version-1.2.10/pipeline/nfdump" + }, + { + "type": "doc", + "id": "version-1.2.10/pipeline/importer" + }, + { + "type": "doc", + "id": "version-1.2.10/pipeline/logstash" + }, + { + "type": "doc", + "id": "version-1.2.10/pipeline/elastic" + } + ] + }, + { + "collapsed": true, + "type": "category", + "label": "Deployment", + "items": [ + { + "type": "doc", + "id": "version-1.2.10/deploy/choose_install" + }, + { + "type": "doc", + "id": "version-1.2.10/deploy/bare_metal_install" + }, + { + "type": "doc", + "id": "version-1.2.10/deploy/docker_install_simple" + }, + { + "type": "doc", + "id": "version-1.2.10/deploy/docker_install_advanced" + }, + { + "type": "doc", + "id": "version-1.2.10/deploy/docker_upgrade" + }, + { + "type": "doc", + "id": "version-1.2.10/deploy/docker_troubleshoot" + } + ] + }, + { + "collapsed": true, + "type": "category", + "label": "Development", + "items": [ + { + "type": "doc", + "id": "version-1.2.10/devel/dev_dataset" + }, + { + "type": "doc", + "id": "version-1.2.10/devel/docker_dev_guide" + }, + { + "type": "doc", + "id": "version-1.2.10/devel/docusaurus" + }, + { + "type": "doc", + "id": "version-1.2.10/devel/docker_dev_tag" + } + ] + } + ] +} diff --git a/website/versions.json b/website/versions.json index 17ed3fdd..2303a6b2 100644 --- a/website/versions.json +++ b/website/versions.json @@ -1,4 +1,5 @@ [ + "1.2.10", "1.2.9", "1.2.8", "1.2.7", From a37b3657a53ed78b50e13aa518b84fb8f5a628eb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 28 May 2021 19:00:31 +0000 Subject: [PATCH 010/126] Bump dns-packet from 1.3.1 to 1.3.4 in /website Bumps [dns-packet](https://github.com/mafintosh/dns-packet) from 1.3.1 to 1.3.4. - [Release notes](https://github.com/mafintosh/dns-packet/releases) - [Changelog](https://github.com/mafintosh/dns-packet/blob/master/CHANGELOG.md) - [Commits](https://github.com/mafintosh/dns-packet/compare/v1.3.1...v1.3.4) Signed-off-by: dependabot[bot] --- website/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/yarn.lock b/website/yarn.lock index bb962ebe..c1e794b1 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -3780,9 +3780,9 @@ dns-equal@^1.0.0: integrity sha1-s55/HabrCnW6nBcySzR1PEfgZU0= dns-packet@^1.3.1: - version "1.3.1" - resolved "https://registry.yarnpkg.com/dns-packet/-/dns-packet-1.3.1.tgz#12aa426981075be500b910eedcd0b47dd7deda5a" - integrity sha512-0UxfQkMhYAUaZI+xrNZOz/as5KgDU0M/fQ9b6SpkyLbk3GEswDi6PADJVaYJradtRVsRIlF1zLyOodbcTCDzUg== + version "1.3.4" + resolved "https://registry.yarnpkg.com/dns-packet/-/dns-packet-1.3.4.tgz#e3455065824a2507ba886c55a89963bb107dec6f" + integrity sha512-BQ6F4vycLXBvdrJZ6S3gZewt6rcrks9KBgM9vrhW+knGRqc8uEdT7fuCwloc7nny5xNoMJ17HGH0R/6fpo8ECA== dependencies: ip "^1.1.0" safe-buffer "^5.0.1" From 8a20a177c8bb42fba1ff0091f3c873dc686a5401 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 5 Jun 2021 06:07:15 +0000 Subject: [PATCH 011/126] Bump ws from 6.2.1 to 6.2.2 in /website Bumps [ws](https://github.com/websockets/ws) from 6.2.1 to 6.2.2. - [Release notes](https://github.com/websockets/ws/releases) - [Commits](https://github.com/websockets/ws/commits) --- updated-dependencies: - dependency-name: ws dependency-type: indirect ... Signed-off-by: dependabot[bot] --- website/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/yarn.lock b/website/yarn.lock index bb962ebe..c260aeff 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -10129,9 +10129,9 @@ write-file-atomic@^3.0.0: typedarray-to-buffer "^3.1.5" ws@^6.2.1: - version "6.2.1" - resolved "https://registry.yarnpkg.com/ws/-/ws-6.2.1.tgz#442fdf0a47ed64f59b6a5d8ff130f4748ed524fb" - integrity sha512-GIyAXC2cB7LjvpgMt9EKS2ldqr0MTrORaleiOno6TweZ6r3TKtoFQWay/2PceJ3RuBasOHzXNn5Lrw1X0bEjqA== + version "6.2.2" + resolved "https://registry.yarnpkg.com/ws/-/ws-6.2.2.tgz#dd5cdbd57a9979916097652d78f1cc5faea0c32e" + integrity sha512-zmhltoSR8u1cnDsD43TX59mzoMZsLKqUweyYBAIvTngR3shc0W6aOZylZmq/7hqyVxPdi+5Ud2QInblgyE72fw== dependencies: async-limiter "~1.0.0" From 01e16e1f20ed2a62373acd9669452e8f298136ce Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Jun 2021 20:28:37 +0000 Subject: [PATCH 012/126] Bump prismjs from 1.23.0 to 1.24.0 in /website Bumps [prismjs](https://github.com/PrismJS/prism) from 1.23.0 to 1.24.0. - [Release notes](https://github.com/PrismJS/prism/releases) - [Changelog](https://github.com/PrismJS/prism/blob/master/CHANGELOG.md) - [Commits](https://github.com/PrismJS/prism/compare/v1.23.0...v1.24.0) --- updated-dependencies: - dependency-name: prismjs dependency-type: indirect ... Signed-off-by: dependabot[bot] --- website/yarn.lock | 39 +++------------------------------------ 1 file changed, 3 insertions(+), 36 deletions(-) diff --git a/website/yarn.lock b/website/yarn.lock index bb962ebe..8a70992e 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -2966,15 +2966,6 @@ cli-boxes@^2.2.1: resolved "https://registry.yarnpkg.com/cli-boxes/-/cli-boxes-2.2.1.tgz#ddd5035d25094fce220e9cab40a45840a440318f" integrity sha512-y4coMcylgSCdVinjiDBuR8PCC2bLjyGTwEmPb9NHR/QaNU6EUOXcTY/s6VjGMD6ENSEaeQYHCY0GNGS5jfMwPw== -clipboard@^2.0.0: - version "2.0.8" - resolved "https://registry.yarnpkg.com/clipboard/-/clipboard-2.0.8.tgz#ffc6c103dd2967a83005f3f61976aa4655a4cdba" - integrity sha512-Y6WO0unAIQp5bLmk1zdThRhgJt/x3ks6f30s3oE3H1mgIEU33XyQjEf8gsf6DxC7NPX8Y1SsNWjUjL/ywLnnbQ== - dependencies: - good-listener "^1.2.2" - select "^1.1.2" - tiny-emitter "^2.0.0" - cliui@^5.0.0: version "5.0.0" resolved "https://registry.yarnpkg.com/cliui/-/cliui-5.0.0.tgz#deefcfdb2e800784aa34f46fa08e06851c7bbbc5" @@ -3707,11 +3698,6 @@ del@^6.0.0: rimraf "^3.0.2" slash "^3.0.0" -delegate@^3.1.2: - version "3.2.0" - resolved "https://registry.yarnpkg.com/delegate/-/delegate-3.2.0.tgz#b66b71c3158522e8ab5744f720d8ca0c2af59166" - integrity sha512-IofjkYBZaZivn0V8nnsMJGBr4jVLxHDheKSW88PyxS5QC4Vo9ZbZVvhzlSxY87fVq3STR6r+4cGepyHkcWOQSw== - depd@~1.1.2: version "1.1.2" resolved "https://registry.yarnpkg.com/depd/-/depd-1.1.2.tgz#9bcd52e14c097763e749b274c4346ed2e560b5a9" @@ -4674,13 +4660,6 @@ globby@^6.1.0: pify "^2.0.0" pinkie-promise "^2.0.0" -good-listener@^1.2.2: - version "1.2.2" - resolved "https://registry.yarnpkg.com/good-listener/-/good-listener-1.2.2.tgz#d53b30cdf9313dffb7dc9a0d477096aa6d145c50" - integrity sha1-1TswzfkxPf+33JoNR3CWqm0UXFA= - dependencies: - delegate "^3.1.2" - got@^9.6.0: version "9.6.0" resolved "https://registry.yarnpkg.com/got/-/got-9.6.0.tgz#edf45e7d67f99545705de1f7bbeeeb121765ed85" @@ -7758,11 +7737,9 @@ prism-react-renderer@^1.1.1: integrity sha512-GHqzxLYImx1iKN1jJURcuRoA/0ygCcNhfGw1IT8nPIMzarmKQ3Nc+JcG0gi8JXQzuh0C5ShE4npMIoqNin40hg== prismjs@^1.23.0: - version "1.23.0" - resolved "https://registry.yarnpkg.com/prismjs/-/prismjs-1.23.0.tgz#d3b3967f7d72440690497652a9d40ff046067f33" - integrity sha512-c29LVsqOaLbBHuIbsTxaKENh1N2EQBOHaWv7gkHN4dgRbxSREqDnDbtFJYdpPauS4YCplMSNCABQ6Eeor69bAA== - optionalDependencies: - clipboard "^2.0.0" + version "1.24.0" + resolved "https://registry.yarnpkg.com/prismjs/-/prismjs-1.24.0.tgz#0409c30068a6c52c89ef7f1089b3ca4de56be2ac" + integrity sha512-SqV5GRsNqnzCL8k5dfAjCNhUrF3pR0A9lTDSCUZeh/LIshheXJEaP0hwLz2t4XHivd2J/v2HR+gRnigzeKe3cQ== process-nextick-args@~2.0.0: version "2.0.1" @@ -8613,11 +8590,6 @@ select-hose@^2.0.0: resolved "https://registry.yarnpkg.com/select-hose/-/select-hose-2.0.0.tgz#625d8658f865af43ec962bfc376a37359a4994ca" integrity sha1-Yl2GWPhlr0Psliv8N2o3NZpJlMo= -select@^1.1.2: - version "1.1.2" - resolved "https://registry.yarnpkg.com/select/-/select-1.1.2.tgz#0e7350acdec80b1108528786ec1d4418d11b396d" - integrity sha1-DnNQrN7ICxEIUoeG7B1EGNEbOW0= - selfsigned@^1.10.8: version "1.10.8" resolved "https://registry.yarnpkg.com/selfsigned/-/selfsigned-1.10.8.tgz#0d17208b7d12c33f8eac85c41835f27fc3d81a30" @@ -9323,11 +9295,6 @@ timsort@^0.3.0: resolved "https://registry.yarnpkg.com/timsort/-/timsort-0.3.0.tgz#405411a8e7e6339fe64db9a234de11dc31e02bd4" integrity sha1-QFQRqOfmM5/mTbmiNN4R3DHgK9Q= -tiny-emitter@^2.0.0: - version "2.1.0" - resolved "https://registry.yarnpkg.com/tiny-emitter/-/tiny-emitter-2.1.0.tgz#1d1a56edfc51c43e863cbb5382a72330e3555423" - integrity sha512-NB6Dk1A9xgQPMoGqC5CVXn123gWyte215ONT5Pp5a0yt4nlEoO1ZWeCwpncaekPHXO60i47ihFnZPiRPjRMq4Q== - tiny-invariant@^1.0.2: version "1.1.0" resolved "https://registry.yarnpkg.com/tiny-invariant/-/tiny-invariant-1.1.0.tgz#634c5f8efdc27714b7f386c35e6760991d230875" From 2c7ff09e01256e6b92ed33320201d84b9d9b92e8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Jun 2021 21:19:41 +0000 Subject: [PATCH 013/126] Bump ssri from 6.0.1 to 6.0.2 in /website Bumps [ssri](https://github.com/npm/ssri) from 6.0.1 to 6.0.2. - [Release notes](https://github.com/npm/ssri/releases) - [Changelog](https://github.com/npm/ssri/blob/v6.0.2/CHANGELOG.md) - [Commits](https://github.com/npm/ssri/compare/v6.0.1...v6.0.2) --- updated-dependencies: - dependency-name: ssri dependency-type: indirect ... Signed-off-by: dependabot[bot] --- website/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/yarn.lock b/website/yarn.lock index c1e794b1..d1cf6005 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -8989,9 +8989,9 @@ sprintf-js@~1.0.2: integrity sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw= ssri@^6.0.1: - version "6.0.1" - resolved "https://registry.yarnpkg.com/ssri/-/ssri-6.0.1.tgz#2a3c41b28dd45b62b63676ecb74001265ae9edd8" - integrity sha512-3Wge10hNcT1Kur4PDFwEieXSCMCJs/7WvSACcrMYrNp+b8kDL1/0wJch5Ni2WrtwEa2IO8OsVfeKIciKCDx/QA== + version "6.0.2" + resolved "https://registry.yarnpkg.com/ssri/-/ssri-6.0.2.tgz#157939134f20464e7301ddba3e90ffa8f7728ac5" + integrity sha512-cepbSq/neFK7xB6A50KHN0xHDotYzq58wWCa5LeWqnPrHG8GzfEjO/4O8kpmcGW+oaxkvhEJCWgbgNk4/ZV93Q== dependencies: figgy-pudding "^3.5.1" From e829f91afcb19296a0151c76080b8b65d8459bbf Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 4 Aug 2021 00:40:56 +0000 Subject: [PATCH 014/126] Bump tar from 6.1.0 to 6.1.4 in /website Bumps [tar](https://github.com/npm/node-tar) from 6.1.0 to 6.1.4. - [Release notes](https://github.com/npm/node-tar/releases) - [Changelog](https://github.com/npm/node-tar/blob/main/CHANGELOG.md) - [Commits](https://github.com/npm/node-tar/compare/v6.1.0...v6.1.4) --- updated-dependencies: - dependency-name: tar dependency-type: indirect ... Signed-off-by: dependabot[bot] --- website/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/yarn.lock b/website/yarn.lock index 730a53a5..e81fff20 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -9206,9 +9206,9 @@ tapable@^1.0.0, tapable@^1.1.3: integrity sha512-4WK/bYZmj8xLr+HUCODHGF1ZFzsYffasLUgEiMBY4fgtltdO6B4WJtlSbPaDTLpYTcGVwM2qLnFTICEcNxs3kA== tar@^6.0.2: - version "6.1.0" - resolved "https://registry.yarnpkg.com/tar/-/tar-6.1.0.tgz#d1724e9bcc04b977b18d5c573b333a2207229a83" - integrity sha512-DUCttfhsnLCjwoDoFcI+B2iJgYa93vBnDUATYEeRx6sntCTdN01VnqsIuTlALXla/LWooNg0yEGeB+Y8WdFxGA== + version "6.1.4" + resolved "https://registry.yarnpkg.com/tar/-/tar-6.1.4.tgz#9f0722b772a5e00dba7d52e1923b37a7ec3799b3" + integrity sha512-kcPWrO8S5ABjuZ/v1xQHP8xCEvj1dQ1d9iAb6Qs4jLYzaAIYWwST2IQpz7Ud8VNYRI+fGhFjrnzRKmRggKWg3g== dependencies: chownr "^2.0.0" fs-minipass "^2.0.0" From e922f71b634014e09719bdd0e1ec7f4f53c31c38 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 12 Aug 2021 22:06:29 +0000 Subject: [PATCH 015/126] Bump path-parse from 1.0.6 to 1.0.7 in /website Bumps [path-parse](https://github.com/jbgutierrez/path-parse) from 1.0.6 to 1.0.7. - [Release notes](https://github.com/jbgutierrez/path-parse/releases) - [Commits](https://github.com/jbgutierrez/path-parse/commits/v1.0.7) --- updated-dependencies: - dependency-name: path-parse dependency-type: indirect ... Signed-off-by: dependabot[bot] --- website/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/yarn.lock b/website/yarn.lock index 730a53a5..61a83b00 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -6931,9 +6931,9 @@ path-key@^3.0.0, path-key@^3.1.0: integrity sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q== path-parse@^1.0.6: - version "1.0.6" - resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.6.tgz#d62dbb5679405d72c4737ec58600e9ddcf06d24c" - integrity sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw== + version "1.0.7" + resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.7.tgz#fbc114b60ca42b30d9daf5858e4bd68bbedb6735" + integrity sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw== path-to-regexp@0.1.7: version "0.1.7" From fafa938f69b20b38700c50247c5196834a403b81 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 20 Aug 2021 18:22:41 +0000 Subject: [PATCH 016/126] Bump url-parse from 1.5.1 to 1.5.3 in /website Bumps [url-parse](https://github.com/unshiftio/url-parse) from 1.5.1 to 1.5.3. - [Release notes](https://github.com/unshiftio/url-parse/releases) - [Commits](https://github.com/unshiftio/url-parse/compare/1.5.1...1.5.3) --- updated-dependencies: - dependency-name: url-parse dependency-type: indirect ... Signed-off-by: dependabot[bot] --- website/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/yarn.lock b/website/yarn.lock index e81fff20..238af2d5 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -9686,9 +9686,9 @@ url-parse-lax@^3.0.0: prepend-http "^2.0.0" url-parse@^1.4.3, url-parse@^1.5.1: - version "1.5.1" - resolved "https://registry.yarnpkg.com/url-parse/-/url-parse-1.5.1.tgz#d5fa9890af8a5e1f274a2c98376510f6425f6e3b" - integrity sha512-HOfCOUJt7iSYzEx/UqgtwKRMC6EU91NFhsCHMv9oM03VJcVo2Qrp8T8kI9D7amFf1cu+/3CEhgb3rF9zL7k85Q== + version "1.5.3" + resolved "https://registry.yarnpkg.com/url-parse/-/url-parse-1.5.3.tgz#71c1303d38fb6639ade183c2992c8cc0686df862" + integrity sha512-IIORyIQD9rvj0A4CLWsHkBBJuNqWpFQe224b6j9t/ABmquIS0qDU2pY6kl6AuOrL5OkCXHMCFNe1jBcuAggjvQ== dependencies: querystringify "^2.1.1" requires-port "^1.0.0" From 3dfa05ca2aaffa4450bc8c8e001eea08288efd48 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Tue, 24 Aug 2021 18:06:53 +0000 Subject: [PATCH 017/126] Made ifindex filtering sensor-specific --- conf-logstash/15-sensor-specific-changes.conf | 46 +++++++++++++++---- env.example | 6 +-- 2 files changed, 40 insertions(+), 12 deletions(-) diff --git a/conf-logstash/15-sensor-specific-changes.conf b/conf-logstash/15-sensor-specific-changes.conf index d9341587..02a2500a 100644 --- a/conf-logstash/15-sensor-specific-changes.conf +++ b/conf-logstash/15-sensor-specific-changes.conf @@ -1,11 +1,18 @@ # Make changes required for specific sensors -# Parameters are obtained from an environment file (default: /etc/logstash/logstash-env-vars - see the logstash systemd file). -# If values are not provided, the defaults following the :'s are used (flags will be False, so nothing will happen). +# ${variable-name:default-value} are obtained from an environment file (the .env file for Docker installations; for bare-metal installations, +# the default is /etc/logstash/logstash-env-vars - see the logstash systemd file) +# If values are not provided (eg, there is no env file), the defaults following the :'s are used. (Flags will be False, so nothing will happen). # With a bare-metal installation, you may also just edit this file and fill in the values you want. +# Using env vars in conditionals has been an open issue for logstash since 2016! Workaround is to add a field. + filter { + # IFINDEX FILTERING #---- Drop flows that do not have src or dst ifindex in a specified list of ifindexes + # Specifying a sensor name is optional. If not given, the ifindex list will apply to all sensors. + # Example settings in env file: ifindex_filter_keep="500; Sensor 1: 123,456; Sensor 2 : 789, 123" + # (If specified, the sensor name must be exact, otherwise spaces don't matter. Separate lists with semicolons.) mutate { add_field => { "[@metadata][ifindex_filter_flag]" => "${ifindex_filter_flag:False}" } } @@ -15,19 +22,39 @@ filter { id => "15-1" } mutate { - # Split the field into an array (in a separate mutate, since in mutate, split happens before all add_fields) - # Add a dummy array element to force it to be an array, in case there is just 1 value in the env file, - # otherwise 'in' will search for a substring in a string, which may not do what we want. - split => { "[@metadata][ifindex_filter_keep]" => "," } + # Split the string on ';' into an array of the same name (in a separate mutate, since in mutate, split happens before all add_fields) + # Add a dummy array element to force it to be an array, in case there is just 1 value in the env file. + split => { "[@metadata][ifindex_filter_keep]" => ";" } add_field => { "[@metadata][ifindex_filter_keep]" => "dummy" } id => "15-2" } - if [meta][src_ifindex] not in [@metadata][ifindex_filter_keep] and [meta][dst_ifindex] not in [@metadata][ifindex_filter_keep] { - drop { } - } + # Each (non-dummy) array element should have 'sensor-name: list-of-approved-ifindexes' ('sensor-name:' optional) + # Use Ruby to loop + ruby { + code => ' + action = "drop" + filters = event.get("[@metadata][ifindex_filter_keep]") + filters.each do |f| + next if f == "dummy" + # if f specifies a sensor that is not the current sensor, we can skip it. + # Otherwise, parse out the ifindex array and see if it includes the current ifindex. + if (! f.include? ":") or (f =~ /#{event.get("[meta][sensor_id]")}\s*:/) + f.sub!(/.*:/, "") + f.gsub!(/\s/, "") + indexes = f.split(",") + if indexes.include? event.get("[meta][src_ifindex]").to_s or indexes.include? event.get("[meta][dst_ifindex]").to_s + action = "keep" + break + end + end + end + event.cancel if action == "drop" + ' + } } + # SENSOR NAME CHANGE BY IFINDEX #---- Change the sensor name for flows from a certain interface (ifindex) mutate { add_field => { "[@metadata][ifindex_sensor_rename_flag]" => "${ifindex_sensor_rename_flag:False}" } @@ -50,6 +77,7 @@ filter { } + # SAMPLING RATE CORRECTIONS #---- Manually apply a sampling correction to listed sensors. Use only in special cases when the flow exporter or collector is providing corrections. # For netflow, a sampling rate correction can be done here or in the nfsen config or nfcapd command using the -s option. # For sflow, there is no such option, so it must be done here. diff --git a/env.example b/env.example index b4645404..c9a9afa4 100644 --- a/env.example +++ b/env.example @@ -5,12 +5,12 @@ RABBITMQ_DEFAULT_USER=guest RABBITMQ_DEFAULT_PASS=guest discovery.type=single-node -# For importer output queue / logstash input queue +# Logstash input (same as importer output) rabbitmq_input_host=rabbit rabbitmq_input_username=guest rabbitmq_input_pw=guest -# For logstash output queue +# Logstash output rabbitmq_output_host=rabbit rabbitmq_output_username=guest rabbitmq_output_pw=guest @@ -19,7 +19,7 @@ rabbitmq_output_key=netsage_archive_input # To drop all flows except those using the specfied interfaces # (see the Docker Advanced documentation) ifindex_filter_flag=False -# ifindex_filter_keep=123,456 +# ifindex_filter_keep=111; Sensor 1: 456; Sensor 2: 789,123 # To change the sensor name for flows using a certain interface # (see the Docker Advanced documentation) From 48963b043594ef89c99542b6fb71633cd799a9c0 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 25 Aug 2021 16:45:11 +0000 Subject: [PATCH 018/126] Fixing es_doc_id calculation by not renaming flow_fingerprint to meta.id until the very end --- conf-logstash/40-aggregation.conf | 16 +++++++--------- conf-logstash/90-additional-fields.conf | 4 ++-- conf-logstash/95-cleanup.conf | 2 +- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/conf-logstash/40-aggregation.conf b/conf-logstash/40-aggregation.conf index e5cff902..c5588c84 100644 --- a/conf-logstash/40-aggregation.conf +++ b/conf-logstash/40-aggregation.conf @@ -1,6 +1,6 @@ ##### COPY ANY CHANGES TO YOUR EXISTING VERSION AFTER AN UPGRADE ##### -## Fields most likely to be specific to a pipeline: +## Fields most likely to be specific to a logstash pipeline: ## These may be set via environment variables. ## aggregate_maps_path - must be unique for each pipeline. Aggregation info is written here if logstash exits. Default is /tmp/logstash-aggregation-maps. ## inactivity_timeout - value depends on timespan of nfcapd files. Default is 630 sec. @@ -9,7 +9,7 @@ # This filter stitches together flows from different nfcapd files, each (usually) spanning a 5 min. period. # Note: netflow keeps the start time the same for all flows with the same fingerprint, even across different nfcapd files; -# duration is cumulative but counts are not. Sflow ends each flow as it is written out, as one would expect. +# duration is cumulative but counts are not. Sflow just sends samples. # If only 1 packet is seen, end time will = start time and duration will be 0. # NOTE: tags added to events before this point in the pipeline aren't kept. @@ -17,13 +17,11 @@ filter { # TSTAT - tstat only reports complete flows, so no stitching is needed! - # Just add stitched_flows=0 (means no stitching attempted) and the fingerprint as meta.id + # Just add stitched_flows=0 (means no stitching attempted) if [meta][flow_type] == 'tstat' { - # on tstat flows, just add the fields we would have had during aggregation mutate { id => "40-1" add_field => { 'stitched_flows' => 0 } - rename => { 'flow_fingerprint' => '[meta][id]' } } } @@ -42,8 +40,8 @@ filter { # unique ID used to aggregate events task_id => '%{[flow_fingerprint]}' - # save the fingerprint value as [meta][id] on timeout - timeout_task_id_field => "[meta][id]" + # save the fingerprint value on timeout + timeout_task_id_field => "[flow_fingerprint]" # use event's start time rather than system time to determine whether a timeout has occured (must be type 'date') timeout_timestamp_field => '[start_date]' @@ -62,7 +60,7 @@ filter { # send the aggregation map as a new event upon timeout push_map_as_event_on_timeout => true - # save the aggregation maps here in case logstash dies + # save the aggregation maps here when logstash shuts down ## (use a different file for each logstash pipeline!) aggregate_maps_path => '${aggregation_maps_path:/tmp/logstash-aggregation-maps}' @@ -78,7 +76,7 @@ filter { map['start'] ||= event.get('start') map['end'] ||= event.get('end') - # save info from the first subflow + # save meta and values info from the first event # values will be updated as we stitch on other flows map['meta'] ||= event.get('meta') map['values'] ||= event.get('values') diff --git a/conf-logstash/90-additional-fields.conf b/conf-logstash/90-additional-fields.conf index 4dabfa00..2a3703f9 100644 --- a/conf-logstash/90-additional-fields.conf +++ b/conf-logstash/90-additional-fields.conf @@ -51,8 +51,8 @@ filter { } } - # Unique id based on the meta.id (five-tuple-plus-sensor) + start time. - # Can be used as the document id in elasticsearch to avoid duplicate records (see ES output filter) + # Unique id based on five-tuple-plus-sensor + start time. + # Can possibly be used as the document id in elasticsearch to avoid duplicate records (see ES output filter) # (use for sflow only). fingerprint { id => '90-6' diff --git a/conf-logstash/95-cleanup.conf b/conf-logstash/95-cleanup.conf index 7e11903b..1138b47c 100644 --- a/conf-logstash/95-cleanup.conf +++ b/conf-logstash/95-cleanup.conf @@ -1,6 +1,6 @@ filter { - # make sure this has been renamed (in case aggregation conf has not been used) + # rename the 5-tuple+sensor hash to meta.id if [flow_fingerprint] { mutate { id => "95-1" From 058d60f14fd7dcf7c3c123a3422c39de71d2ba19 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 25 Aug 2021 16:57:59 +0000 Subject: [PATCH 019/126] Tags will be added to 0.0.0.x flows. --- conf-logstash/95-cleanup.conf | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/conf-logstash/95-cleanup.conf b/conf-logstash/95-cleanup.conf index 1138b47c..e0a35168 100644 --- a/conf-logstash/95-cleanup.conf +++ b/conf-logstash/95-cleanup.conf @@ -1,5 +1,14 @@ filter { + # Tag flows with 2 missing IPs (0.0.0.0s). + # Check or edit the 99-outputs file for any action to be taken based on these tags. + if [meta][src_ip] == "0.0.0.x" and [meta][dst_ip] == "0.0.0.x" { + mutate { + add_tag => ["Missing IPs"] + add_tag => ["DROP"] + } + } + # rename the 5-tuple+sensor hash to meta.id if [flow_fingerprint] { mutate { From 3209fcd482c84c0c9dd82e714180c7045457fde4 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 26 Aug 2021 17:37:16 +0000 Subject: [PATCH 020/126] If duration is <= 0.002, reset it, bps, and pps to 0 --- conf-logstash/10-preliminaries.conf | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/conf-logstash/10-preliminaries.conf b/conf-logstash/10-preliminaries.conf index bcb7b5b0..cda1bdd2 100644 --- a/conf-logstash/10-preliminaries.conf +++ b/conf-logstash/10-preliminaries.conf @@ -48,15 +48,25 @@ filter { } } - # 4. Convert any ms timestamps to s - # 5. Drop any events with start or end times in the future or too far in the past + # 4. If duration (eg from aggregation by nfdump in importer) is <= .002, set it to 0. + # When duration is too small, bps calculation is highly inaccurate. + if [values][duration] <= 0.002 { + mutate { + id => "10-6" + replace => {"[values][duration]" => 0} + replace => {"[values][bits_per_second]" => 0} + replace => {"[values][packets_per_second]" => 0} + } + } + + ruby { - id => "10-6" + id => "10-7" code => " flow_ts = event.get('start').to_f flow_te = event.get('end').to_f - # Convert any timestamps in ms to s + # 5. Convert any timestamps in ms to s if flow_ts > 9999999999.0 flow_ts = flow_ts / 1000.0 event.set('start', flow_ts) @@ -66,8 +76,8 @@ filter { event.set('end', flow_te) end - # DROP any event with a strange start or end time - # > 10 sec in the future or > 1 year in the past, or end < start + # 6. DROP any event with a strange start or end time + # > 10 sec in the future or > 1 year in the past, or end < start current_t = Time.now.to_f age_s = current_t - flow_ts age_e = current_t - flow_te From fda219415158e39dfd86835179b3da532aa25a77 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 26 Aug 2021 17:46:08 +0000 Subject: [PATCH 021/126] Added NORDUnet* and tacc_netflows to sensor_groups and _types regexes --- conf-logstash/support/sensor_groups.json | 1 + conf-logstash/support/sensor_types.json | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/conf-logstash/support/sensor_groups.json b/conf-logstash/support/sensor_groups.json index 1ac85667..8d81e64b 100644 --- a/conf-logstash/support/sensor_groups.json +++ b/conf-logstash/support/sensor_groups.json @@ -10,6 +10,7 @@ "^GigaPOP.*": "I-Light", "^NEAAR.*": "NEAAR", "^NEA3R.*": "NEAAR", + "^NORDUnet.*": "NORDUnet", "^.*nersc.*": "NERSC", "^.*pacificwave.*": "PacWave", "^.*pnw-gigapop\\.net$": "PacWave", diff --git a/conf-logstash/support/sensor_types.json b/conf-logstash/support/sensor_types.json index 026ef616..3b8c25b2 100644 --- a/conf-logstash/support/sensor_types.json +++ b/conf-logstash/support/sensor_types.json @@ -1,11 +1,12 @@ { "^.*Tstat$": "Data Archive", "^.*nersc\\.gov$": "Data Archive", - "^GEANT.*$": "Circuit", "^Hawaii.*netflow$": "Circuit", "^NEAAR.*": "Circuit", "^NEA3R.*": "Circuit", "^TransPAC.*": "Circuit", + "^GEANT.*$": "Circuit", + "^NORDUnet.*$": "Circuit", "^SingAREN.*$": "Exchange Point", "^.*pacificwave\\.net$": "Exchange Point", "^.*pnw-gigapop\\.net$": "Exchange Point", @@ -21,5 +22,5 @@ "^.*sox.*$": "Regional Network", "^.*SoX.*$": "Regional Network", "^Sun Corridor.*$": "Regional Network", - "^tacc_sflows$": "Regional Network" + "^tacc_netflows$": "Regional Network" } From 804f84cc53eaea5ff7ed2aed1d015fcd864c1d8e Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 26 Aug 2021 18:43:13 +0000 Subject: [PATCH 022/126] Added onenet-members-list.rb to the lists of files that are downloaded --- compose/importer/docker_init.sh | 2 +- cron.d/netsage-memberlists-update.cron | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/compose/importer/docker_init.sh b/compose/importer/docker_init.sh index 599938ce..691166b6 100755 --- a/compose/importer/docker_init.sh +++ b/compose/importer/docker_init.sh @@ -7,7 +7,7 @@ mkdir -p $DATA_DIR && echo "Cache directory ${DATA_DIR} created" || echo "cache FILES="GeoLite2-ASN scireg GeoLite2-City" CAIDA_FILES="CAIDA-org-lookup" -RUBY_DATA="FRGP-members-list ilight-members-list" +RUBY_DATA="FRGP-members-list ilight-members-list onenet-members-list" function downloadFiles() { ext=$1 diff --git a/cron.d/netsage-memberlists-update.cron b/cron.d/netsage-memberlists-update.cron index 0d834cde..824f78fc 100644 --- a/cron.d/netsage-memberlists-update.cron +++ b/cron.d/netsage-memberlists-update.cron @@ -14,3 +14,6 @@ MAILTO=root # on Wednesdays at 23:50 UTC ##50 23 * * 3 root /usr/bin/wget --user xxx --password xxx https://scienceregistry.grnoc.iu.edu/exported/ilight-members-list.rb -q -O /etc/logstash/conf.d/support/newilight.rb && mv /etc/logstash/conf.d/support/newilight.rb /etc/logstash/conf.d/support/ilight-members-list.rb && touch /etc/logstash/conf.d/support/ilight-members-list.rb + +# on Wednesdays at 23:55 UTC +##55 23 * * 3 root /usr/bin/wget --user xxx --password xxx https://scienceregistry.grnoc.iu.edu/exported/onenet-members-list.rb -q -O /etc/logstash/conf.d/support/newonenet.rb && mv /etc/logstash/conf.d/support/newonenet.rb /etc/logstash/conf.d/support/onenet-members-list.rb && touch /etc/logstash/conf.d/support/onenet-members-list.rb From 39f8b5708fa84eb71fc841fc59f02ee45ae99a47 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 26 Aug 2021 19:14:00 +0000 Subject: [PATCH 023/126] Improved comments in env and override example files --- docker-compose.override_example.yml | 16 +++++++++------- env.example | 15 +++++++++------ 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/docker-compose.override_example.yml b/docker-compose.override_example.yml index 75c276df..58d5799f 100644 --- a/docker-compose.override_example.yml +++ b/docker-compose.override_example.yml @@ -3,17 +3,19 @@ services: logstash: image: netsage/pipeline_logstash:latest - ## If you need to allocate more than 1GB (default) override the JMV options - # volumes: - # - ./userConfig/jvm.options:/usr/share/logstash/config/jvm.options + ## If you need to override JVM options, uncomment these lines + # volumes: + # - ./userConfig/jvm.options:/usr/share/logstash/config/jvm.options importer: image: netsage/pipeline_importer:latest - ## If you add additional collectors, you need to uncomment the following line and modify the file to add additional collections - # volumes: - # - ./userConfig/netsage_override.xml:/etc/grnoc/netsage/deidentifier/netsage_shared.xml + ## If you add additional collectors or need to make other changes in the importer "shared" config, + ## use the netsage_override.xml file and uncomment the following lines + # volumes: + # - ./userConfig/netsage_override.xml:/etc/grnoc/netsage/deidentifier/netsage_shared.xml - ## Add any additional collectors here. You may remove any collectors that are not needed. + ## Modify port numbers as needed, and add any additional collectors here (see Docker Advanced documentation). + ## Remove any collectors that do not need to be running. sflow-collector: image: netsage/nfdump-collector:alpine-1.6.23 restart: always diff --git a/env.example b/env.example index c9a9afa4..9baf8a2a 100644 --- a/env.example +++ b/env.example @@ -5,12 +5,15 @@ RABBITMQ_DEFAULT_USER=guest RABBITMQ_DEFAULT_PASS=guest discovery.type=single-node -# Logstash input (same as importer output) +# rabbitmq server for the importer output queue = the Logstash input queue +# default is to use the local rabbitmq server rabbitmq_input_host=rabbit rabbitmq_input_username=guest rabbitmq_input_pw=guest -# Logstash output +# rabbitmq server for the Logstash output queue +# default is to use the local rabbitmq server +# === FOR NETSAGE, ASK FOR THE PROPER SETTINGS === rabbitmq_output_host=rabbit rabbitmq_output_username=guest rabbitmq_output_pw=guest @@ -22,14 +25,14 @@ rabbitmq_output_key=netsage_archive_input # ifindex_filter_keep=111; Sensor 1: 456; Sensor 2: 789,123 # To change the sensor name for flows using a certain interface -# (see the Docker Advanced documentation) +# (See the Docker Advanced documentation) ifindex_sensor_rename_flag=False # ifindex_sensor_rename_old_name=oldname # ifindex_sensor_rename_new_name=newname # ifindex_sensor_rename_ifindex=0 -# To correct flow sizes and rates for sampling for certain sensors -# (see the Docker Advanced documentation) +# To "manually" correct flow sizes and rates for sampling for specified sensors +# (See the Docker Advanced documentation. This is uncommon.) sampling_correction_flag=False # sampling_correction_sensors=sensor1,sensor2 # sampling_correction_factor=1 @@ -39,7 +42,7 @@ rabbitmq_output_key=netsage_archive_input XPACK_MONITORING_ENABLED=false # java heap size for logstash LS_JAVA_OPTS=-Xmx2g -Xms2g -# for the logstash aggregation filter, ensure only one logstash worker is running +# the logstash aggregation filter requires that only one logstash worker is running PIPELINE_WORKERS=1 # for debugging ## LOG_LEVEL=debug From 6ba92ca308869cae5413f0e3a216b2d285fb47fe Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 26 Aug 2021 20:00:36 +0000 Subject: [PATCH 024/126] Updated pipeline version number and CHANGES file for v1.2.11 --- CHANGES.md | 13 +++++++++++++ conf-logstash/98-post-process.conf | 4 ++-- grnoc-netsage-deidentifier.spec | 6 +++--- lib/GRNOC/NetSage/Deidentifier.pm | 2 +- 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 1fa71fd7..49a50b19 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,16 @@ +------------------------------------------------------ +## GRNOC NetSage Deidentfier 1.2.11 -- Aug 2021 +------------------------------------------------------ +Features: + * Made filtering by ifindex (optionally) sensor-specific + * Added tags to flows with src and dst IPs = 0.0.0.x + * When duration <= 0.002 sec, set duration, bits/s, and packets/s to 0 as rates are inaccurate for small durations + * Added NORDUnet* and tacc_netflows to sensor group and type regexes + * Added onenet-members-list.rb to the members-list files to download + +Bugs: + * Fixed the es_doc_id hash to always include meta.id and the start time. It was missing meta.id previously. + ------------------------------------------------------ ## GRNOC NetSage Deidentfier 1.2.10 -- May 10 2021 ------------------------------------------------------ diff --git a/conf-logstash/98-post-process.conf b/conf-logstash/98-post-process.conf index 9efb1da0..9e9d9d03 100644 --- a/conf-logstash/98-post-process.conf +++ b/conf-logstash/98-post-process.conf @@ -5,8 +5,8 @@ filter { code => ' event.set( "@exit_time", Time.now ); event.set( "@processing_time", event.get("@exit_time") - event.get("@ingest_time") ); - event.set( "@pipeline_ver", "1.2.10" ); + event.set( "@pipeline_ver", "1.2.11" ); ' - tag_on_exception => '_rubyexception in 98-outputs, failed to set @processing_time' + tag_on_exception => '_rubyexception in 98-post-process.conf' } } diff --git a/grnoc-netsage-deidentifier.spec b/grnoc-netsage-deidentifier.spec index ab855932..1c53b7f2 100644 --- a/grnoc-netsage-deidentifier.spec +++ b/grnoc-netsage-deidentifier.spec @@ -1,7 +1,7 @@ Summary: GRNOC NetSage Flow-Processing Pipeline Name: grnoc-netsage-deidentifier # update Version here, in conf-logstash/98-post-process.conf, lib/GRNOC/NetSage/Deidentifier.pm -Version: 1.2.10 +Version: 1.2.11 Release: 1%{?dist} License: GRNOC Group: Measurement @@ -137,12 +137,12 @@ rm -rf $RPM_BUILD_ROOT %config(noreplace) /etc/logstash/conf.d/01-input-rabbit.conf %config(noreplace) /etc/logstash/conf.d/01-input-multiline-json-file.conf.disabled %config(noreplace) /etc/logstash/conf.d/01-input-jsonfile.conf.disabled +%config(noreplace) /etc/logstash/conf.d/15-sensor-specific-changes.conf +%config(noreplace) /etc/logstash/conf.d/40-aggregation.conf %config(noreplace) /etc/logstash/conf.d/99-output-rabbit.conf %config(noreplace) /etc/logstash/conf.d/99-output-jsonlog.conf.disabled %config(noreplace) /etc/logstash/conf.d/99-output-multiline-json.conf.disabled %config(noreplace) /etc/logstash/conf.d/99-output-elastic.conf.disabled -%config(noreplace) /etc/logstash/conf.d/15-sensor-specific-changes.conf -%config(noreplace) /etc/logstash/conf.d/40-aggregation.conf # logstash files that can be updated automatically (if there are updates, the old ver will be in .rpmsave) %config /etc/logstash/conf.d/10-preliminaries.conf %config /etc/logstash/conf.d/20-add-id.conf diff --git a/lib/GRNOC/NetSage/Deidentifier.pm b/lib/GRNOC/NetSage/Deidentifier.pm index 48112e4b..66d9de3e 100644 --- a/lib/GRNOC/NetSage/Deidentifier.pm +++ b/lib/GRNOC/NetSage/Deidentifier.pm @@ -3,7 +3,7 @@ package GRNOC::NetSage::Deidentifier; use strict; use warnings; -our $VERSION = "1.2.10"; +our $VERSION = "1.2.11"; 1; From 7f0fc45a96da5f359ba65245313cdf3277e59472 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Fri, 27 Aug 2021 16:56:48 +0000 Subject: [PATCH 025/126] Changed order in env.example and updated some documentation --- env.example | 54 +++++++-------- website/docs/components/docker_env.md | 42 +++++------- website/docs/components/docker_pipeline.md | 13 ++-- website/docs/deploy/docker_install_simple.md | 70 ++++++++++++-------- 4 files changed, 98 insertions(+), 81 deletions(-) diff --git a/env.example b/env.example index 9baf8a2a..bd4c8b30 100644 --- a/env.example +++ b/env.example @@ -1,19 +1,11 @@ -# Local RabbitMQ Server config -RABBITMQ_ERLANG_COOKIE='secret cookie' -RABBIT_HOST=rabbit -RABBITMQ_DEFAULT_USER=guest -RABBITMQ_DEFAULT_PASS=guest -discovery.type=single-node - -# rabbitmq server for the importer output queue = the Logstash input queue -# default is to use the local rabbitmq server -rabbitmq_input_host=rabbit -rabbitmq_input_username=guest -rabbitmq_input_pw=guest +# Importer settings +# == EXAMPLE VALUES MUST BE REPLACED == +sflowSensorName=The Sflow Sensor Name +netflowSensorName=The Netflow Sensor Name -# rabbitmq server for the Logstash output queue +# Logstash output rabbit queue # default is to use the local rabbitmq server -# === FOR NETSAGE, ASK FOR THE PROPER SETTINGS === +# === FOR SENDING TO GlobalNOC, ASK FOR THE PROPER SETTINGS === rabbitmq_output_host=rabbit rabbitmq_output_username=guest rabbitmq_output_pw=guest @@ -21,22 +13,29 @@ rabbitmq_output_key=netsage_archive_input # To drop all flows except those using the specfied interfaces # (see the Docker Advanced documentation) - ifindex_filter_flag=False +ifindex_filter_flag=False # ifindex_filter_keep=111; Sensor 1: 456; Sensor 2: 789,123 # To change the sensor name for flows using a certain interface # (See the Docker Advanced documentation) - ifindex_sensor_rename_flag=False +ifindex_sensor_rename_flag=False # ifindex_sensor_rename_old_name=oldname # ifindex_sensor_rename_new_name=newname # ifindex_sensor_rename_ifindex=0 # To "manually" correct flow sizes and rates for sampling for specified sensors # (See the Docker Advanced documentation. This is uncommon.) - sampling_correction_flag=False +sampling_correction_flag=False # sampling_correction_sensors=sensor1,sensor2 # sampling_correction_factor=1 +# Logstash Aggregation Filter settings +# default inactivity_timeout is 630 sec for 5-minute nfcapd files; for 15-minute files, use 960 sec. +# max_flow_timeout is the maximum flow duration; longer flows will be broken up. +inactivity_timeout=630 +max_flow_timeout=86400 +aggregation_maps_path=/data/logstash-aggregation-maps + # Logstash settings # set this to false so we don't install elasticsearch locally XPACK_MONITORING_ENABLED=false @@ -47,17 +46,18 @@ PIPELINE_WORKERS=1 # for debugging ## LOG_LEVEL=debug -# Importer settings -# == EXAMPLE VALUES MUST BE REPLACED == -sflowSensorName=The Sflow Sensor Name -netflowSensorName=The Netflow Sensor Name +# Local RabbitMQ Server config +RABBITMQ_ERLANG_COOKIE='secret cookie' +RABBIT_HOST=rabbit +RABBITMQ_DEFAULT_USER=guest +RABBITMQ_DEFAULT_PASS=guest +discovery.type=single-node -# Logstash Aggregation Filter settings -# default inactivity_timeout is 630 sec for 5-minute nfcapd files; for 15-minute files, use 960 sec. -# max_flow_timeout is the maximum flow duration; longer flows will be broken up. -inactivity_timeout=630 -max_flow_timeout=86400 -aggregation_maps_path=/data/logstash-aggregation-maps +# Importer output rabbit host = Logstash input rabbit host +# default is to use the local rabbitmq server +rabbitmq_input_host=rabbit +rabbitmq_input_username=guest +rabbitmq_input_pw=guest # In case you run elasticsearch and kibana ELASTIC_HOSTNAME='elastic' diff --git a/website/docs/components/docker_env.md b/website/docs/components/docker_env.md index 845e433b..942daece 100644 --- a/website/docs/components/docker_env.md +++ b/website/docs/components/docker_env.md @@ -1,41 +1,35 @@ -Please copy `env.example` to `.env` +Next, copy `env.example` to `.env` ```sh cp env.example .env ``` -then edit the .env file to set the sensor names +then edit the .env file to set the sensor names to unique identifiers (with spaces or not, no quotes) ```sh -sflowSensorName=my sflow sensor name -netflowSensorName=my netflow sensor name +# Importer settings +sflowSensorName=My sflow sensor name +netflowSensorName=My netflow sensor name ``` -Simply change the names to unique identifiers (with spaces or not, no quotes) and you're good to go. -:::note -These names uniquely identify the source of the data and will be shown in the Grafana dashboards. In elasticsearch, they are saved in the `meta.sensor_id` field. Choose names that are meaningful and unique. -For example, your sensor names might be "RNDNet New York Sflow" and "RNDNet Boston Netflow" or "RNDNet New York - London 1" and "RNDNet New York - London 2". Whatever makes sense in your situation. -::: - - - If you don't set a sensor name, the default docker hostname, which changes each time you run the pipeline, will be used. - If you have only one collector, remove or comment out the line for the one you are not using. - If you have more than one of the same type of collector, see the "Docker Advanced" documentation. +:::note +These names uniquely identify the source of the data and will be shown in the Grafana dashboards. In elasticsearch, they are saved in the `meta.sensor_id` field. Choose names that are meaningful and unique. +For example, your sensor names might be "MyNet New York Sflow" and "MyNet Boston Netflow" or "MyNet New York - London" and "MyNet New York - Paris". Whatever makes sense in your situation. +::: -Other settings of note in this file include the following. You will not necessarily need to change these, but be aware. - -**rabbit_output_host**: this defines where the final data will land after going through the pipeline. By default, the last rabbit queue will be on `rabbit`, ie, the local rabbitMQ server running in its docker container. Enter a hostname to send to a remote rabbitMQ server (also the correct username, password, and queue key/name). (For NetSage, another logstash pipeline on a remote server moves flows from this final rabbit queue into Elasticsearch.) +Other things you may need to edit in this file... -The following Logstash Aggregation Filter settings are exposed in case you wish to use different values. -(See comments in the \*-aggregation.conf file.) The aggregation filter stitches together long-lasting flows that are seen in multiple nfcapd files, matching by the 5-tuple (source and destination IPs, ports, and protocol) plus sensor name. +**Logstash output rabbit queue**: This section defines where the final data will land after going through the pipeline. By default, it will end in a rabbitmq queue on `rabbit`, ie, the local rabbitMQ server running in its docker container. Enter a hostname to send to a remote rabbitMQ server (also the correct username, password, and queue key/name). -**Aggregation_maps_path**: the name of the file to which logstash will write in-progress aggregation data when logstash shuts down. When logstash starts up again, it will read this file in and resume aggregating. The filename is configurable for complex situations, but /data/ is required. +::: NOTE +To send processed flow data to GlobalNOC at Indiana University, you will need to obtain settings for this section from your contact. At IU, data from the this final rabbit queue will be moved into an Elasticsearch instance for storage. +::: -**Inactivity_timeout**: If more than inactivity_timeout seconds have passed between the 'start' of a flow and the 'start' -of the LAST matching flow, OR if no matching flow has coming in for inactivity_timeout seconds -on the clock, assume the flow has ended. +**To drop all flows except those using the specfied interfaces**: If only some flows from a router are of interest and those can be identified by interface, set the flag variable to "True" and uncomment and set the other fields. If a flow's src OR dst ifindex is in the list specified, keep it. A list of ifindexes may be scoped to a specific sensor name (which traces back to a specific port). -:::note -Nfcapd files are typically written every 5 minutes. Netsage uses an inactivity_timeout = 630 sec = 10.5 min for 5-min files; 960 sec = 16 min for 15-min files. (For 5-min files, this allows one 5 min gap or period during which the no. of bits transferred don't meet the cutoff) -::: +**To change the sensor name for flows using a certain interface**: If you want to break out some flows coming into a port and give them a different sensor name, set the flag variable to "True" and uncomment and set the other fields. -**max_flow_timeout**: If a long-lasting flow is still aggregating when this timeout is reached, arbitrarily cut it off and start a new flow. The default is 24 hours. +**To "manually" correct flow sizes and rates for sampling for specified sensors**: Once in a while, sampling corrections need to be applied by the logstash pipeline. Normally this is done automatically by nfdump in the importer. If required, set the flag variable to "True", specify which sensors need the correction, and enter N where the sampling rate is 1 out of N. +See Docker Advanced for more information about the last options. diff --git a/website/docs/components/docker_pipeline.md b/website/docs/components/docker_pipeline.md index c2628348..a0709f08 100644 --- a/website/docs/components/docker_pipeline.md +++ b/website/docs/components/docker_pipeline.md @@ -1,20 +1,23 @@ Start up the pipeline (all containers) using: ```sh -docker-compose up -d +# docker-compose up -d ``` This will also restart any containers/processes that have died. "-d" runs containers in the background. You can see the status of the containers and whether any have died (exited) using ```sh -docker-compose ps +# docker-compose ps ``` To check the logs for each of the containers, run ```sh -docker-compose logs +# docker-compose logs +# docker-compose logs logstash +# docker-compose logs importer +etc. ``` Add `-f` or, e.g., `-f logstash` to see new log messages as they arrive. `--timestamps`, `--tail`, and `--since` are also useful -- look up details in Docker documentation. @@ -22,5 +25,7 @@ Add `-f` or, e.g., `-f logstash` to see new log messages as they arrive. `--tim To shut down the pipeline (all containers) use ```sh -docker-compose down +# docker-compose down ``` + +Run all commands from the netsage-pipeline/ directory. diff --git a/website/docs/deploy/docker_install_simple.md b/website/docs/deploy/docker_install_simple.md index bf3d6856..705896f7 100644 --- a/website/docs/deploy/docker_install_simple.md +++ b/website/docs/deploy/docker_install_simple.md @@ -23,29 +23,39 @@ The data processing pipeline needs data to ingest in order to do anything, of co - netflow - tstat -At least one of these must be set up on a sensor (flow exporter/router), to provide the incoming flow data. +At least one of these must be set up on a *sensor* (i.e., flow *exporter* / router), to provide the incoming flow data. You can do this step later, but it will helpful to have it working first. -Sflow and netflow data should be exported to the pipeline host where there are collectors (nfcapd and/or sfcapd processes) ready to receive it (see below). To use the default settings, send sflow to port 9998 and netflow to port 9999. On the pipeline host, allow incoming traffic from the flow exporters, of course. +Sflow and netflow data should be exported to the pipeline host where there will be *collectors* (nfcapd and/or sfcapd processes) ready to receive it (see below). To use the default settings, send sflow to port 9998 and netflow/IPFIX to port 9999. On the pipeline host, allow incoming traffic from the flow exporters, of course. Tstat data should be sent directly to the logstash input rabbit queue "netsage_deidentifier_raw" on the pipeline host. No collector is needed for tstat data. See the netsage-project/tstat-transport repo. (From there, logstash will grab the data and process it the same way as it processes sflow/netflow data. (See the Docker Advanced guide.) -### 2. Clone the Netsage Pipeline Project +### 2. Set up a Pipeline Host +Decide where to run the Docker Pipeline and get it set up. Adjust iptables to allow the flow exporters (routers) to send flow data to the host. -If you haven't already, install [Docker](https://www.docker.com) and [Docker Compose](https://docs.docker.com/compose/install/) and clone this project +Install Docker Engine (docker-ce, docker-ce-cli, containerd.io) - see instructions at [https://docs.docker.com/engine/install/](https://docs.docker.com/engine/install/). + +Install Docker Compose from Docker's GitHub repository - see [https://docs.docker.com/compose/install/](https://docs.docker.com/compose/install/). You need to specify version 1.29.2 (or newer) in the curl command. + +Check default file permissions. If the *logstash* user is not able to access the logstash config files in the git checkout, you'll get an error from logstash saying there are no .conf files found even though they are there. Various components also need to be able to read and write to the data/ directory in the checkout. Defaults of 775 (u=rwx, g=rwx, o=rx) should work. + +### 3. Clone the Netsage Pipeline Project + +Clone the netsage-pipeline project from github. ```sh git clone https://github.com/netsage-project/netsage-pipeline.git ``` -(If you are upgrading to a new release, see the Upgrade section below!) -Then checkout the right version of the code. +When the pipeline runs, it uses the logstash conf files that are in the git checkout (in conf-logstash/), as well as a couple other files like docker-compose.yml, so it is important to checkout the correct version. + +Move into the netsage-pipeline/ directory (**all git and docker commands must be run from inside this directory!**), then checkout the most recent version of the code. It will say you are in 'detached HEAD' state. ```sh git checkout {tag} ``` -Replace "{tag}" with the release version you intend to use, e.g., "v1.2.8". ("Master" is the development version and is not intended for general use!) -`git status` will confirm which branch you are on, e.g., master or v1.2.8. +Replace "{tag}" with the release version you intend to use, e.g., "v1.2.11". ("Master" is the development version and is not intended for general use!) +`git status` will confirm which branch you are on, e.g., master or v1.2.11. -### 3. Create Docker-compose.override.yml +### 4. Create Docker-compose.override.yml Information in the `docker-compose.yml` file tells docker which containers (processes) to run and sets various parameters for them. Settings in the `docker-compose.override.yml` file will overrule and add to those. Note that docker-compose.yml should not be edited since upgrades will replace it. Put all customizations in the override file, since override files will not be overwritten. @@ -56,46 +66,54 @@ Collector settings may need to be edited by the user, so the information that do cp docker-compose.override_example.yml docker-compose.override.yml ``` -By default docker will bring up a single netflow collector and a single sflow collector. If this matches your case, you don't need to make any changes to the docker-compose.override_example.yml. If you have only one collector, remove or comment out the section for the one not needed so the collector doesn't run and simply create empty nfcapd files. -:::note -If you only have one collector, you should remove or comment out the section for the collector that is not used, so it doesn't run and just create empty files. -::: +By default docker will bring up a single slow collector and a single netflow collector that listen to udp traffic on ports localhost:9998 and 9999. If this matches your case, you don't need to make any changes to the docker-compose.override_example.yml. -This file also specifies port numbers, and directories for nfcapd files. By default, the sflow collector will listen to udp traffic on localhost:9998, while the netflow collector will listen on port 9999, and data will be written to `/data/input_data/`. Each collector is namespaced by its type so the sflow collector will write data to `/data/input_data/sflow/` and the netflow collector will write data to `/data/input_data/netflow/`. Change these only if required. +- If you have only one collector, remove or comment out the section for the one not needed so the collector doesn't run and simply create empty nfcapd files. +- If the collectors need to listen to different ports, make the appropriate changes here in both the "command:" and "ports:" lines. +- By default, the collectors will save flows to nfcapd files in sflow/ and netflow/ subdirectories in `./data/input_data/` (i.e., the data/ directory in the git checkout). If you need to save the data files to a different location, see the Docker Advanced section. Other lines in this file you can ignore for now. :::note -If you run into issues, try removing all the comments in the override file as they may conflict with the parsing done by docker-compose +If you run into issues, try removing all the comments in the override file as they may conflict with the parsing done by docker-compose, though we have not found this to be a problem. ::: - -### 4. Create Environment File - -{@import ../components/docker_env.md} - ### 5. Choose Pipeline Version -Once you've created the docker-compose.override.xml file and finished adjusting it for any customizations, you're ready to select which version Docker should run. +Once you've created the docker-compose.override.xml file and finished adjusting it for any customizations, you're ready to select which image versions Docker should run. ```sh ./scripts/docker_select_version.sh ``` When prompted, select the **same version** you checked out earlier. -This script will replace the version numbers of docker images in the docker-compose files with the correct values. -## Running the Collectors +This script will replace the version numbers of docker images in docker-compose.override.yml with the correct values. + +### 6. Create Environment File + +{@import ../components/docker_env.md} -After selecting the version to run, you could start the two flow collectors by themselves by running the following line. If you only need one of the collectors, remove the other from this command. +## Testing the Collectors -(Or see the next section for how to start all the containers, including the collectors.) +At this point, you can start the two flow collectors by themselves by running the following line. If you only need one of the collectors, remove the other from this command. + +(See the next section for how to start all the containers, including the collectors.) ```sh docker-compose up -d sflow-collector netflow-collector ``` -If the collector(s) are running properly, you should see nfcapd files in subdirectories of data/input_data/, and they should have sizes of more than a few hundred bytes. (See Troubleshooting if you have problems.) +Subdirectories for sflow/netflow, year, month, and day are created automatically under `data/input_data/`. File names contain dates and times. +These are not text files; to view the contents, use an [nfdump command](http://www.linuxcertif.com/man/1/nfdump/) (you will need to install nfdump). +Files will be deleted automatically by the importer as they age out (the default is to keep 3 days). + +If the collector(s) are running properly, you should see nfcapd files being written every 5 minutes and they should have sizes of more than a few hundred bytes. (Empty files still have header and footer lines.) +See Troubleshooting if you have problems. +To stop the collectors +```sh +docker-compose down +``` ## Running the Collectors and Pipeline From c76847d3acd059b73a2cc6397774066342e2ea02 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Mon, 30 Aug 2021 16:46:25 +0000 Subject: [PATCH 026/126] Documentation improvements --- website/docs/components/docker_env.md | 13 ++- .../docs/deploy/docker_install_advanced.md | 103 +++++++++--------- website/docs/deploy/docker_install_simple.md | 6 +- website/docs/deploy/docker_troubleshooting.md | 34 +----- website/docs/deploy/docker_upgrade.md | 29 +++-- website/docs/devel/docker.md | 2 +- 6 files changed, 84 insertions(+), 103 deletions(-) diff --git a/website/docs/components/docker_env.md b/website/docs/components/docker_env.md index 942daece..7977381f 100644 --- a/website/docs/components/docker_env.md +++ b/website/docs/components/docker_env.md @@ -18,18 +18,19 @@ These names uniquely identify the source of the data and will be shown in the Gr For example, your sensor names might be "MyNet New York Sflow" and "MyNet Boston Netflow" or "MyNet New York - London" and "MyNet New York - Paris". Whatever makes sense in your situation. ::: -Other things you may need to edit in this file... +You will also likely want to change where the data is sent at the end of the logstash pipeline. **Logstash output rabbit queue**: This section defines where the final data will land after going through the pipeline. By default, it will end in a rabbitmq queue on `rabbit`, ie, the local rabbitMQ server running in its docker container. Enter a hostname to send to a remote rabbitMQ server (also the correct username, password, and queue key/name). -::: NOTE +:::note To send processed flow data to GlobalNOC at Indiana University, you will need to obtain settings for this section from your contact. At IU, data from the this final rabbit queue will be moved into an Elasticsearch instance for storage. ::: -**To drop all flows except those using the specfied interfaces**: If only some flows from a router are of interest and those can be identified by interface, set the flag variable to "True" and uncomment and set the other fields. If a flow's src OR dst ifindex is in the list specified, keep it. A list of ifindexes may be scoped to a specific sensor name (which traces back to a specific port). +The following options are described in the Docker Advanced section: + +**To drop all flows except those using the specfied interfaces**: Use if only some flows from a router are of interest and those can be identified by interface. -**To change the sensor name for flows using a certain interface**: If you want to break out some flows coming into a port and give them a different sensor name, set the flag variable to "True" and uncomment and set the other fields. +**To change the sensor name for flows using a certain interface**: Use if you want to break out some flows coming into a port and give them a different sensor name. -**To "manually" correct flow sizes and rates for sampling for specified sensors**: Once in a while, sampling corrections need to be applied by the logstash pipeline. Normally this is done automatically by nfdump in the importer. If required, set the flag variable to "True", specify which sensors need the correction, and enter N where the sampling rate is 1 out of N. +**To "manually" correct flow sizes and rates for sampling for specified sensors**: Use if sampling corrections are not being done automatically (which is normally the case). -See Docker Advanced for more information about the last options. diff --git a/website/docs/deploy/docker_install_advanced.md b/website/docs/deploy/docker_install_advanced.md index 259571d2..ba8dfe2d 100644 --- a/website/docs/deploy/docker_install_advanced.md +++ b/website/docs/deploy/docker_install_advanced.md @@ -18,11 +18,11 @@ Any number of sensors can be accomodated, although if there are more than a few ### a. Edit docker-compose.override.yml -The pattern to add a flow collector is always the same. To add an sflow collector called example-collector, edit the docker-compose.override.yml file and add +The pattern to add a flow collector is always the same. To add an sflow collector called example-collector, edit the docker-compose.override.yml file and add something like ```yaml example-collector: - image: netsage/nfdump-collector:1.6.18 + image: netsage/nfdump-collector:alpine-1.6.23 restart: always command: sfcapd -T all -l /data -S 1 -w -z -p 9997 volumes: @@ -31,12 +31,11 @@ The pattern to add a flow collector is always the same. To add an sflow collecto - "9997:9997/udp" ``` -- collector-name: should be updated to something that has some meaning, in our example "example-collector". -- command: choose between sfcapd for sflow and nfcapd for netflow, and at the end of the command, specify the port to watch for incoming flow data. (Unless your flow exporter is already set up to use a different port, you can use the default ports and configure the exporters on the routers to match.) -- ports: make sure the port here matches the port you've set in the command. Naturally all ports have to be unique for this host and the -router should be configured to export data to the same port. (If the port on your docker container is different than the port on your host/local machine, use container_port:host_port.) -- volumes: specify where to write the nfcapd files. Make sure the path is unique and in ./data/. In this case, we're writing to ./data/input_data/example. Change the last part of the path to something meaningful. - +- collector name: should be updated to something that has some meaning, in our example "example-collector". +- image: copy from the default collector sections already in the file. +- command: choose between "sfcapd" for sflow and "nfcapd" for netflow, and at the end of the command, specify the port to watch for incoming flow data. +- volumes: specify where to write the nfcapd files. Make sure the path is unique and in ./data/. In this case, we're writing to ./data/input_data/example. Change "example" to something meaningful. +- ports: make sure the port here matches the port you've set in the command. Naturally all ports have to be unique for this host and the router should be configured to export data to the same port. (?? If the port on your docker container is different than the port on your host/local machine, use container_port:host_port.) You will also need to uncomment these lines: ```yaml @@ -53,7 +52,7 @@ To make the Pipeline Importer aware of the new data to process, you will need to cp compose/importer/netsage_shared.xml userConfig/netsage_override.xml ``` -Edit netsage_override.xml and add a "collection" section for the new sensor as in the following example. The flow-path should match the path set above in docker-compose.override.yml. $exampleSensorName is a new "variable"; it will be replaced with a value set in the .env file. For the flow-type, enter "sflow" or "netflow" as appropriate. +Edit netsage_override.xml and add a new "collection" section for the new sensor as in the following example. The flow-path should match the path set above in docker-compose.override.yml. $exampleSensorName is a new "variable"; don't replace it here, it will be replaced with a value that you set in the .env file. For the flow-type, enter "sflow" or "netflow" as appropriate. (Enter "netflow" if you're running IPFIX.) ```xml @@ -65,10 +64,10 @@ Edit netsage_override.xml and add a "collection" section for the new sensor as i ### c. Edit environment file -Then, in the .env file, add a line that sets a value for the "variable" you referenced above, $exampleSensorName. The value is the name of the sensor which will be saved to elasticsearch and which appears in Netsage Dashboards. Set it to something meaningful and unique. +Then, in the .env file, add a line that sets a value for the "variable" you referenced above, $exampleSensorName. The value is the name of the sensor which will be saved to elasticsearch and which appears in Netsage Dashboards. Set it to something meaningful and unique. E.g., ```ini -exampleSensorName=Example New York sFlow +exampleSensorName=MyNet Los Angeles sFlow ``` @@ -80,26 +79,28 @@ After doing the setup above and selecting the docker version to run, you can sta docker-compose up -d example-collector ``` -:::note -The default version of the collector is 1.6.18. There are other versions released and :latest should be point to the latest one, but there is no particular effort made to make sure we released the latest version. You can get a listing of all the current tags listed [here](https://hub.docker.com/r/netsage/nfdump-collector/tags) and the source to generate the docker image can be found [here](https://github.com/netsage-project/docker-nfdump-collector) the code for the You may use a different version though there is no particular effort to have an image for every nfdump release. -::: - - ## To Keep Only Flows From Certain Interfaces -If your sensors are exporting all flows, but you only want to keep some of them (eg, only send some of them to NetSage), use this option. The collectors and importer will process all flows, but in the logstash pipeline, those that do not have src_ifindex or dst_inindex equal to one of the listed interfaces will be dropped. +If your sensors are exporting all flows, but only those using a particular interface are relevant, use this option in the .env file. The collectors and importer will save/read all incoming flows, but the logstash pipeline will drop those that do not have src_ifindex OR dst_inindex equal to one of those listed. -In the .env file, uncomment the apprpriate section and enter the information required. Be sure "True" is capitalized as shown and list all the ifindex values of flows that should be kept and passed on to NetSage. You may enter one or more ifindex values. For example, +In the .env file, uncomment lines in the appropriate section and enter the information required. Be sure `ifindex_filter_flag=True` with "True" capitalized as shown, any sensor names are spelled exactly right, and list all the ifindex values of flows that should be kept and processed. Some examples: ```sh -ifindex_filter_flag=True +ifindex_filter_keep=123 ifindex_filter_keep=123,456 +ifindex_filter_keep=Sensor 1: 789 +ifindex_filter_keep=123; Sensor 1: 789; Sensor 2: 800, 900 ``` -In this case, only flows that have src_ifindex = 123 or src_ifindex = 456 or dst_ifindex = 123 or dst_ifindex = 456 will be kept. All others will be dropped. +In the first case, all flows that have src_ifindex = 123 or dst_ifindex = 123 will be kept, regardless of sensor name. (Note that this may be a problem if you have more than 1 sensor with the same ifindex values!) +In the 2nd case, if src or dst ifindex is 123 or 456, the flow will be processed. +In the 3rd case, only flows from Sensor 1 will be filtered, with flows using ifindex 789 kept. +In the last example, any flow with ifindex 123 will be kept. Sensor 1 flows with ifindex 789 (or 123) will be kept, and those from Sensor 2 having ifindex 800 or 900 (or 123) will be kept. + +Spaces don't matter except within the sensor names. Punctuation is required as shown. ## To Change a Sensor Name Depending on the Interface Used -In some cases, users want to differentiate between flows that enter or exit through specific sensor interfaces. This can be done by editing the env file. +In some cases, users want to keep all flows from a certain sensor but differentiate between those that enter or exit through specific sensor interfaces. This can be done by using this option in the .env file. In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all 4 fields are set properly! For example, @@ -110,16 +111,16 @@ ifindex_sensor_rename_new_name=IU Bloomington Sflow ifindex_sensor_rename_ifindex=10032 ``` -In this case, any flows from the "IU Sflow" sensor that come through interface 10032 (src_ifindex = 10032 OR dst_ifindex = 10032) will have the sensor name (sensor_id in ElasticSearch) changed from "IU Sflow" to "IU Bloomington Sflow". Currently, only one such rename can be configured in Docker. +In this case, any flows from the "IU Sflow" sensor that use interface 10032 (src_ifindex = 10032 OR dst_ifindex = 10032) will have the sensor name changed from "IU Sflow" to "IU Bloomington Sflow". Currently, only one such rename can be configured in Docker and only 1 ifindex is allowed. :::note Please notify the devs at IU in advance, if you need to modify a sensor name, because the regexes used for determining sensor_group and sensor_type may have to be updated. ::: ## To Do Sampling Rate Corrections in Logstash -When flow sampling is done, the number of bits needs to be corrected for the sampling rate. For example, if you are sampling 1 out of 100 flows and a sample has 55 MB, it is assumed that in reality there would be 100 flows of that size (with that src and dst), so the number of bits is multiplied by 100. Usually the collector (nfcapd or sfcapd process) gets the sampling rate from the incoming data and applies the correction, but in some cases, the sensor may not send the sampling rate, or there may be a complex set-up that requires a manual correction. With netflow, a manual correction can be applied using the '-s' option in the nfsen config or the nfcapd command. For sflow, there is no such option. In either case, the correction can be made in logstash as follows. +When flow sampling is done, corrections have to be applied. For example, if you are sampling 1 out of 100 flows, for each flow measured, it is assumed that in reality there would be 100 flows of that size with that src and dst, so the number of bits (and the number of packets, bits/s and packets/s) is multiplied by 100. Usually the collector (nfcapd or sfcapd process) gets the sampling rate from the incoming data and applies the correction, but in some cases, the sensor may not send the sampling rate, or there may be a complex set-up that requires a manual correction. With netflow, a manual correction can be applied using the '-s' option in the nfsen config, if nfsen is being used, or the nfcapd command, but this is not convenient when using Docker. For sflow, there is no such option. In either case, the correction can be made in logstash as follows. -In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all 3 fields are set properly! The same correction can be applied to multiple sensors by using a comma-separed list. For example, +In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all 3 fields are set properly! The same correction can be applied to multiple sensors by using a comma-separed list. The same correction applies to all listed sensors. For example, ```sh sampling_correction_flag=True @@ -128,17 +129,17 @@ sampling_correction_factor=512 ``` ## To Change How Long Nfcapd Files Are Kept -The importer will automatically delete older nfcapd files for you, so that your disk don't fill up. By default, 3 days worth of files will be kept. This can be adjusted by making a netsage_override.xml file: +The importer will automatically delete older nfcapd files for you, so that your disk doesn't fill up. By default, 3 days worth of files will be kept. This can be adjusted by making a netsage_override.xml file: ```sh cp compose/importer/netsage_shared.xml userConfig/netsage_override.xml ``` -At the bottom of the file, edit this section to set the number of days worth of files to keep. Set cull-enable to 0 for no culling. Eg, to save 7 days worth of data: +At the bottom of the file, edit this section to set the number of days worth of files to keep. Set cull-enable to 0 for no culling. Eg, to save 1 days worth of data: ````xml 1 - 7 + 1 ```` @@ -150,16 +151,38 @@ You will also need to uncomment these lines in docker-compose.override.yml: ``` +## To Save Flow Data to a Different Location + +By default, data is saved to subdirectories in the ./data/ directory (ie, the data/ directory in the git checkout). If you would like to use a different location, there are two options. + +1. The best solution is to create a sym link between ./data/ and the preferred location, or, for an NFS volume, export it as ${PROJECT_DIR}/data +2. Alternatively, update the path to the in all locations in docker-compose.yml and docker-compose.override.yml Eg, to save nfcapd files to subdirs in /var, set the collector volumes to "- /var/input_data/netflow:/data" (similarly for sflow) and set the importer volumes to "- /var:/data". + +:::warning +If you choose to update the docker-compose file, keep in mind that those changes will cause a merge conflict or be wiped out on upgrade. +You'll have to manage the volumes exported and ensure all the paths are updated correctly for the next release manually. +::: + ## To Customize Java Settings / Increase Memory Available for Lostash -If you need to modify the amount of memory logstash can use or any other java settings, -rename the provided example for JVM Options and tweak the settings as desired. + +If cpu or memory seems to be a problem, try increasing the JVM heap size for logstash from 2GB to 3 or 4, no more than 8. + +To do this, edit LS_JAVA_OPTS in the .env file. [is this working??] + +Here are some tips for adjusting the JVM heap size (https://www.elastic.co/guide/en/logstash/current/jvm-settings.html): + +- Set the minimum (Xms) and maximum (Xmx) heap allocation size to the same value to prevent the heap from resizing at runtime, which is a very costly process. +- CPU utilization can increase unnecessarily if the heap size is too low, resulting in the JVM constantly garbage collecting. You can check for this issue by doubling the heap size to see if performance improves. +- Do not increase the heap size past the amount of physical memory. Some memory must be left to run the OS and other processes. As a general guideline for most installations, don’t exceed 50-75% of physical memory. The more memory you have, the higher percentage you can use. + +To modify other logstash settings, rename the provided example file for JVM Options and tweak the settings as desired: ```sh cp userConfig/jvm.options_example userConfig/jvm.options ``` -Also update the docker-compose.override.xml file to uncomment lines in the logstash section. It should look something like this: +Also update the docker-compose.override.xml file to uncomment lines in the logstash section. It should look something like this: ```yaml logstash: @@ -168,29 +191,9 @@ logstash: - ./userConfig/jvm.options:/usr/share/logstash/config/jvm.options ``` -Here are some tips for adjusting the JVM heap size (https://www.elastic.co/guide/en/logstash/current/jvm-settings.html): - -- The recommended heap size for typical ingestion scenarios should be no less than 4GB and no more than 8GB. -- CPU utilization can increase unnecessarily if the heap size is too low, resulting in the JVM constantly garbage collecting. You can check for this issue by doubling the heap size to see if performance improves. -- Do not increase the heap size past the amount of physical memory. Some memory must be left to run the OS and other processes. As a general guideline for most installations, don’t exceed 50-75% of physical memory. The more memory you have, the higher percentage you can use. -- Set the minimum (Xms) and maximum (Xmx) heap allocation size to the same value to prevent the heap from resizing at runtime, which is a very costly process. - ## To Bring up Kibana and Elasticsearch Containers The file docker-compose.develop.yaml can be used in conjunction with docker-compose.yaml to bring up the optional Kibana and Elastic Search components. This isn't a production pattern but the tools can be useful at times. Please refer to the [Docker Dev Guide](../devel/docker_dev_guide#optional-elasticsearch-and-kibana) -## For Data Saved to an NFS Volume - -By default, data is saved to subdirectories in the ./data directory. If you would like to use an NFS mount instead you will need to either - -1. export the NFS volume as ${PROJECT_DIR}/data (which is the idea scenario and least intrusive) -2. update the path to the NFS export path in all locations in docker-compose.yml and docker-compose.override.yml - -Note: modifying all the paths in the two files should work, but may not. In one case, it worked to modify only the paths for the collector volumes (eg, - /mnt/nfs/netsagedata/netflow:/data), leaving all others with their default values. - -:::warning -If you choose to update the docker-compose file, keep in mind that those changes will cause a merge conflict on upgrade. -You'll have to manage the volumes exported and ensure all the paths are updated correctly for the next release manually. -::: diff --git a/website/docs/deploy/docker_install_simple.md b/website/docs/deploy/docker_install_simple.md index 705896f7..1c538ecf 100644 --- a/website/docs/deploy/docker_install_simple.md +++ b/website/docs/deploy/docker_install_simple.md @@ -35,7 +35,7 @@ Decide where to run the Docker Pipeline and get it set up. Adjust iptables to al Install Docker Engine (docker-ce, docker-ce-cli, containerd.io) - see instructions at [https://docs.docker.com/engine/install/](https://docs.docker.com/engine/install/). -Install Docker Compose from Docker's GitHub repository - see [https://docs.docker.com/compose/install/](https://docs.docker.com/compose/install/). You need to specify version 1.29.2 (or newer) in the curl command. +Install Docker Compose from Docker's GitHub repository - see [https://docs.docker.com/compose/install/](https://docs.docker.com/compose/install/). You need to **specify version 1.29.2** (or newer) in the curl command. Check default file permissions. If the *logstash* user is not able to access the logstash config files in the git checkout, you'll get an error from logstash saying there are no .conf files found even though they are there. Various components also need to be able to read and write to the data/ directory in the checkout. Defaults of 775 (u=rwx, g=rwx, o=rx) should work. @@ -66,7 +66,7 @@ Collector settings may need to be edited by the user, so the information that do cp docker-compose.override_example.yml docker-compose.override.yml ``` -By default docker will bring up a single slow collector and a single netflow collector that listen to udp traffic on ports localhost:9998 and 9999. If this matches your case, you don't need to make any changes to the docker-compose.override_example.yml. +By default docker will bring up a single sflow collector and a single netflow collector that listen to udp traffic on ports localhost:9998 and 9999. If this matches your case, you don't need to make any changes to the docker-compose.override_example.yml. - If you have only one collector, remove or comment out the section for the one not needed so the collector doesn't run and simply create empty nfcapd files. - If the collectors need to listen to different ports, make the appropriate changes here in both the "command:" and "ports:" lines. @@ -87,7 +87,7 @@ Once you've created the docker-compose.override.xml file and finished adjusting ``` When prompted, select the **same version** you checked out earlier. -This script will replace the version numbers of docker images in docker-compose.override.yml with the correct values. +This script will replace the version numbers of docker images in docker-compose.override.yml and docker-compose.yml with the correct values. ### 6. Create Environment File diff --git a/website/docs/deploy/docker_troubleshooting.md b/website/docs/deploy/docker_troubleshooting.md index 1ad608ab..8a048fa0 100644 --- a/website/docs/deploy/docker_troubleshooting.md +++ b/website/docs/deploy/docker_troubleshooting.md @@ -23,39 +23,17 @@ To see if flows are getting into and being read from the rabbit queue on the pip ### If flow collection stops -**Logstash or Importer errors:** -- Make sure all containers are running. `docker ps` -- Check the logs of the various containers to see if anything jumps out as being invalid.  `docker-compose logs -f $service_label` -- Check the logs to see if logstash is starting successfully. +*Errors:** +- See if any of the containers has died. `docker ps` +- Check the logs of the various containers to see if anything jumps out as being invalid. Eg, `docker-compose logs logstash`. **Disk space:** - If the pipeline suddenly fails, check to see if the disk is full. If it is, first try getting rid of old docker images and containers to free up space: `docker image prune -a` and `docker container prune`. -- Also check to see how much space the nfcapd files are comsuming. You need to add more disk space. You could try saving fewer days of nfcapd files (see Docker Advanced). +- Also check to see how much space the nfcapd files are consuming. You may need to add more disk space. You could also try deleting nfcapd files after a fewer number of days (see Docker Advanced). **Memory:** - If you are running a lot of data, sometimes docker may need to be allocated more memory. The most -likely culprit is logstash which is usually only allocated 2GB of RAM. You'll need to update the jvm.options file to grant it more memory. - -Please see the [Docker Advanced guide](docker_install_advanced.md#customize-logstash-settings) for details on how to customize logstash. - -Applying this snippet to logstash may help. For example, to give logstash (java) 3GB, - -```yaml -environment: + LS_JAVA_OPTS=-Xmx3g -``` - -Alternatively you may also try doing this: - -```yaml -deploy: - resources: - limits: - cpus: "0.50" - memory: 50M - reservations: - cpus: "0.25" - memory: 20M -``` +likely culprit is logstash which is only allocated 2GB of RAM by default. -Reference: https://docs.docker.com/compose/compose-file/#resources +Please see the Docker Advanced guide. diff --git a/website/docs/deploy/docker_upgrade.md b/website/docs/deploy/docker_upgrade.md index a598caac..ba9ab726 100644 --- a/website/docs/deploy/docker_upgrade.md +++ b/website/docs/deploy/docker_upgrade.md @@ -1,7 +1,7 @@ --- id: docker_upgrade title: Upgrading -sidebar_label: Docker Upgrading +sidebar_label: Docker - Upgrading --- To upgrade a previous installment of the Dockerized pipeline, perform the following steps. @@ -16,8 +16,9 @@ This will stop all the docker containers, including the importer, logstash, and ### Update Source Code -To upgrade to a new release, just reset and pull changes including the new release from github. Your customized .env and override files will not be overwritten. +To upgrade to a new release, pull new code from github and ine images from dockerhub. Your customized .env and override files will not be overwritten. +Update the git repo, mainly to be able to see the latest tags. ```sh git reset --hard git pull origin master @@ -30,27 +31,25 @@ Example: ```git commit -a -m "Saving local state"; git checkout -b feature/backup; git checkout master``` ::: +Run these three commands to select the new release you want to run. In the first, replace "{tag}" by the version to run (eg, v1.2.10). When asked by the third, select the same version as the tag you checked out. +```sh +git checkout -b {tag} +git pull +./scripts/docker_select_version.sh +``` +Check to be sure docker-compose.yml and docker-compose.override.yml both now have the version number you selected for pipeline_importer and pipeline_logstash. + ### Check/Update Override Files -Occasionally, the required version of Docker or nfdump may change, which will necessitate editing your override and/or env files. +Occasionally, something may change which will necessitate editing your override and/or env file. -- Compare the new `docker-compose.override_example.yml` file to your `docker-compose.override.yml` to see if a new version of Docker is required. Look for, eg, `version: "3.7"` at the top. If the version number is different, change it in your docker-compose.override.yml file and upgrade Docker manually. +- Compare the new `docker-compose.override_example.yml` file to your `docker-compose.override.yml`. Look for`version: "x.x"` at the top. If the version number is different, change it in your docker-compose.override.yml file. (This is the Compose file format version.) -- Also check to see if the version of nfdump has changed. Look for lines like `image: netsage/nfdump-collector:1.6.18`. Make sure the version in your override file matches what is the example file. (You do not need to actually perform any upgrade yourself. This will ensure the correct version is pulled from Docker Hub.) -Note that you do not need to update the versions of the importer or logstash images. That will be done for you in the "select release version" stop coming up. +- Check to see if the version of nfdump has changed. Look for lines like `image: netsage/nfdump-collector:`. Make sure the version in your override file matches what is the example file. (You do not need to actually perform any upgrade yourself. This will ensure the correct version is pulled from Docker Hub.) - Also compare your `.env` file with the new `env.example` file to see if any new lines or sections have been added. If there have been any changes relevant to your deployment, eg, new options you want to use, copy the changes into your .env file. - If you used the Docker Advanced guide to make a `netsage_override.xml` file, compare it to `netsage_shared.xml` to see if there are any changes. This is unlikely. -### Select Release Version - -Run these two commands to select the new release you want to run. In the first, replace "{tag}" by the version to run (eg, v1.2.10). When asked by the second, select the same version as the tag you checked out. -```sh -git checkout -b {tag} -git pull -./scripts/docker_select_version.sh -``` -Check to be sure docker-compose.yml and docker-compose.override.yml both now have the version number you selected. ### Update Docker Containers diff --git a/website/docs/devel/docker.md b/website/docs/devel/docker.md index 76735113..21cb7d5c 100644 --- a/website/docs/devel/docker.md +++ b/website/docs/devel/docker.md @@ -8,7 +8,7 @@ sidebar_label: Docker Dev Guide You can use the "master" version or a tagged version. To select a released version use the docker_select_version.sh script (see the Deployment Guide). -If you wish to use the development version (master branch) simply scip the docker_select_version.sh step. +If you wish to use the development version (master branch) simply skip the docker_select_version.sh step. ## Installing From 52868ee3f1b0ce222f290dadf9b4a30ed16d5491 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Mon, 30 Aug 2021 17:35:51 +0000 Subject: [PATCH 027/126] More documentation updates --- website/docs/components/docker_env.md | 8 +++----- website/docs/deploy/docker_install_advanced.md | 3 +++ website/docs/deploy/docker_troubleshooting.md | 1 + website/docs/pipeline/elastic_search.md | 11 ++++++----- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/website/docs/components/docker_env.md b/website/docs/components/docker_env.md index 7977381f..75c3480a 100644 --- a/website/docs/components/docker_env.md +++ b/website/docs/components/docker_env.md @@ -18,12 +18,10 @@ These names uniquely identify the source of the data and will be shown in the Gr For example, your sensor names might be "MyNet New York Sflow" and "MyNet Boston Netflow" or "MyNet New York - London" and "MyNet New York - Paris". Whatever makes sense in your situation. ::: -You will also likely want to change where the data is sent at the end of the logstash pipeline. +You will also want to edit the **Logstash output rabbit queue** section. This section defines where the final data will land after going through the pipeline. By default, it will be written to a rabbitmq queue on `rabbit`, ie, the local rabbitMQ server running in the docker container. Enter a hostname to send to a remote rabbitMQ server (also the correct username, password, and queue key/name). -**Logstash output rabbit queue**: This section defines where the final data will land after going through the pipeline. By default, it will end in a rabbitmq queue on `rabbit`, ie, the local rabbitMQ server running in its docker container. Enter a hostname to send to a remote rabbitMQ server (also the correct username, password, and queue key/name). - -:::note -To send processed flow data to GlobalNOC at Indiana University, you will need to obtain settings for this section from your contact. At IU, data from the this final rabbit queue will be moved into an Elasticsearch instance for storage. +:::for pipelines sending to GlobalNOC +To send processed flow data to GlobalNOC at Indiana University, you will need to obtain settings for this section from your contact. A new queue may need to be set up at IU, as well as allowing traffic from your pipeline host. (At IU, data from the this final rabbit queue will be moved into an Elasticsearch instance for storage and viewing.) ::: The following options are described in the Docker Advanced section: diff --git a/website/docs/deploy/docker_install_advanced.md b/website/docs/deploy/docker_install_advanced.md index ba8dfe2d..f56bb5cd 100644 --- a/website/docs/deploy/docker_install_advanced.md +++ b/website/docs/deploy/docker_install_advanced.md @@ -36,6 +36,9 @@ The pattern to add a flow collector is always the same. To add an sflow collecto - command: choose between "sfcapd" for sflow and "nfcapd" for netflow, and at the end of the command, specify the port to watch for incoming flow data. - volumes: specify where to write the nfcapd files. Make sure the path is unique and in ./data/. In this case, we're writing to ./data/input_data/example. Change "example" to something meaningful. - ports: make sure the port here matches the port you've set in the command. Naturally all ports have to be unique for this host and the router should be configured to export data to the same port. (?? If the port on your docker container is different than the port on your host/local machine, use container_port:host_port.) + +Make sure the indentation is right or you'll get an error about yaml parsing. + You will also need to uncomment these lines: ```yaml diff --git a/website/docs/deploy/docker_troubleshooting.md b/website/docs/deploy/docker_troubleshooting.md index 8a048fa0..8b10ed1d 100644 --- a/website/docs/deploy/docker_troubleshooting.md +++ b/website/docs/deploy/docker_troubleshooting.md @@ -26,6 +26,7 @@ To see if flows are getting into and being read from the rabbit queue on the pip *Errors:** - See if any of the containers has died. `docker ps` - Check the logs of the various containers to see if anything jumps out as being invalid. Eg, `docker-compose logs logstash`. +- If logstash dies with an error about not finding \*.conf files, make sure conf-logstash/ and directories and files within are readable by everyone (and directories are executable by everyone). The data/ directory and subdirectories need to be readable and writable by everyone, as well. **Disk space:** - If the pipeline suddenly fails, check to see if the disk is full. If it is, first try getting rid of old docker images and containers to free up space: `docker image prune -a` and `docker container prune`. diff --git a/website/docs/pipeline/elastic_search.md b/website/docs/pipeline/elastic_search.md index 047643da..8114b21b 100644 --- a/website/docs/pipeline/elastic_search.md +++ b/website/docs/pipeline/elastic_search.md @@ -13,6 +13,7 @@ Flow data is ultimately saved to Elasticsearch. Following are the fields that ar |start |Jun 9, 2020 @ 17:39:53.808 | Start time of the flow (first packet seen)| |end |Jun 9, 2020 @ 17:39:57.699 |End time of the flow (last packet seen)| |meta.id | a17c4f05420d7ded9eb151ccd293a633 ff226d1752b24e0f4139a87a8b26d779 |Id of the flow (hash of 5-tuple + Sensor name)| +|es_doc_id |4f46bef884... |Hash of meta.id and start time. May be used as doc id in ES to prevent duplicates, but see Notes elsewhere.| |meta.flow_type |sflow |'sflow', 'netflow', or 'tstat'| |meta.protocol |tcp |Protocol used| |meta.sensor_id | snvl2-pw-sw-1-mgmt-2.cenic.net|Sensor name (set in importer config, may not always be a hostname) | @@ -102,12 +103,12 @@ The [Science Registry](https://scienceregistry.netsage.global/rdb/) stores human |name |example |description | |-----------------------|-----------------------|-----------------------------| -|@ingest_time |Jun 9, 2020 @ 10:03:20.700 | Essentially time the flow went into the logstash pipeline or the time stitching of the flow commenced| -|@timestamp |Jun 9, 2020 @ 18:03:21.703 |The time the flow went into the logstash pipeline for tstat flows, or the time stitching finished and the event was pushed for other flows.| +|@pipeline_ver |1.2.11 | Version number of the pipeline used to process this flow | +|@ingest_time |Jun 9, 2020 @ 10:03:20.700 | The time the flow entered the logstash pipeline | +|@timestamp |Jun 9, 2020 @ 18:03:21.703 |The time the flow entered the logstash pipeline for tstat flows, or the time stitching finished and the event exited the aggregation filter for other flows.| |@exit_time |Jun 9, 2020 @ 18:03:25.369 |The time the flow exited the pipeline | |@processing_time |688.31 |@exit_time minus @ingest_time. Useful for seeing how long stitching took. | -|stitched_flows |13 |Number of flows that came into logstash that were stitched together to make this final one. 1 if no flows were stitched together. 0 for tstat flows, which are never stitched. | -|es_doc_id |4f46bef884... |Hash of meta.id and start time. May be used as doc id in ES to prevent duplicates, but see Notes elsewhere.| +|stitched_flows |13 |Number of flows that came into logstash that were stitched together to make this final one. 1 if no flows were stitched together. 0 for tstat flows, which are never stitched. | |tags |maxmind src asn |Various info and error messages| |trial | 5 |Can be set in 40-aggregation.conf if desired| @@ -117,7 +118,7 @@ The [Science Registry](https://scienceregistry.netsage.global/rdb/) stores human |-----------------------|-----------------------|-----------------------------| |_index | om-ns-netsage-2020.06.14 | name of the index ("database table") | |_type |_doc | set by ES | -|_id |HRkcm3IByJ9fEnbnCpaY | elasticsearch document id. If es_doc_id is provided, that is used. | +|_id |HRkcm3IByJ9fEnbnCpaY | elasticsearch document id. | |_score |1 |set by ES query | |@version |1 | set by ES | From 34284c341fd44111a888052c54660b8883de7ce6 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Mon, 30 Aug 2021 18:23:06 +0000 Subject: [PATCH 028/126] yet more doc updates --- website/docs/components/docker_env.md | 10 ++++++++-- website/docs/deploy/docker_install_advanced.md | 3 +++ website/docs/pipeline/elastic_search.md | 4 ++-- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/website/docs/components/docker_env.md b/website/docs/components/docker_env.md index 75c3480a..0bfe77ac 100644 --- a/website/docs/components/docker_env.md +++ b/website/docs/components/docker_env.md @@ -20,7 +20,13 @@ For example, your sensor names might be "MyNet New York Sflow" and "MyNet Boston You will also want to edit the **Logstash output rabbit queue** section. This section defines where the final data will land after going through the pipeline. By default, it will be written to a rabbitmq queue on `rabbit`, ie, the local rabbitMQ server running in the docker container. Enter a hostname to send to a remote rabbitMQ server (also the correct username, password, and queue key/name). -:::for pipelines sending to GlobalNOC +```sh +rabbitmq_output_host=rabbit@mynet.edu +rabbitmq_output_username=guest +rabbitmq_output_pw=guest +rabbitmq_output_key=netsage_archive_input +``` +:::note To send processed flow data to GlobalNOC at Indiana University, you will need to obtain settings for this section from your contact. A new queue may need to be set up at IU, as well as allowing traffic from your pipeline host. (At IU, data from the this final rabbit queue will be moved into an Elasticsearch instance for storage and viewing.) ::: @@ -30,5 +36,5 @@ The following options are described in the Docker Advanced section: **To change the sensor name for flows using a certain interface**: Use if you want to break out some flows coming into a port and give them a different sensor name. -**To "manually" correct flow sizes and rates for sampling for specified sensors**: Use if sampling corrections are not being done automatically (which is normally the case). +**To "manually" correct flow sizes and rates for sampling for specified sensors**: Use if sampling corrections are not being done automatically. Normally you do not need to use this, but check flows to be sure results are reasonable. diff --git a/website/docs/deploy/docker_install_advanced.md b/website/docs/deploy/docker_install_advanced.md index f56bb5cd..e2750705 100644 --- a/website/docs/deploy/docker_install_advanced.md +++ b/website/docs/deploy/docker_install_advanced.md @@ -172,6 +172,9 @@ You'll have to manage the volumes exported and ensure all the paths are updated If cpu or memory seems to be a problem, try increasing the JVM heap size for logstash from 2GB to 3 or 4, no more than 8. To do this, edit LS_JAVA_OPTS in the .env file. [is this working??] +```yaml +LS_JAVA_OPTS=-Xmx4g -Xms4g +``` Here are some tips for adjusting the JVM heap size (https://www.elastic.co/guide/en/logstash/current/jvm-settings.html): diff --git a/website/docs/pipeline/elastic_search.md b/website/docs/pipeline/elastic_search.md index 8114b21b..c82a8dbd 100644 --- a/website/docs/pipeline/elastic_search.md +++ b/website/docs/pipeline/elastic_search.md @@ -12,8 +12,8 @@ Flow data is ultimately saved to Elasticsearch. Following are the fields that ar |-----------------------|-----------------------|-----------------------------| |start |Jun 9, 2020 @ 17:39:53.808 | Start time of the flow (first packet seen)| |end |Jun 9, 2020 @ 17:39:57.699 |End time of the flow (last packet seen)| -|meta.id | a17c4f05420d7ded9eb151ccd293a633 ff226d1752b24e0f4139a87a8b26d779 |Id of the flow (hash of 5-tuple + Sensor name)| -|es_doc_id |4f46bef884... |Hash of meta.id and start time. May be used as doc id in ES to prevent duplicates, but see Notes elsewhere.| +|meta.id |a17c4f0542... |Id of the flow (hash of 5-tuple + Sensor name)| +|es_doc_id |4f46bef884... |Hash of meta.id and start time. May be used as doc id in ES to prevent duplicates, but see Notes elsewhere.| |meta.flow_type |sflow |'sflow', 'netflow', or 'tstat'| |meta.protocol |tcp |Protocol used| |meta.sensor_id | snvl2-pw-sw-1-mgmt-2.cenic.net|Sensor name (set in importer config, may not always be a hostname) | From 13094889d523fc9b158b7042dfe07cf3b304fcd2 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Mon, 30 Aug 2021 18:49:02 +0000 Subject: [PATCH 029/126] set missing ip's to 0.0.0.0, ifindexes to -10, duration to 0 --- CHANGES.md | 3 +- conf-logstash/10-preliminaries.conf | 50 +++++++++++++++++++++++++---- 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 49a50b19..445673d2 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -7,9 +7,10 @@ Features: * When duration <= 0.002 sec, set duration, bits/s, and packets/s to 0 as rates are inaccurate for small durations * Added NORDUnet* and tacc_netflows to sensor group and type regexes * Added onenet-members-list.rb to the members-list files to download + * At the beginning, set missing IPs to 0.0.0.0, missing ifindexes to -10, missing durations to 0. Bugs: - * Fixed the es_doc_id hash to always include meta.id and the start time. It was missing meta.id previously. + * Fixed es_doc_id. The hash was missing meta.id previously due to a bug. ------------------------------------------------------ ## GRNOC NetSage Deidentfier 1.2.10 -- May 10 2021 diff --git a/conf-logstash/10-preliminaries.conf b/conf-logstash/10-preliminaries.conf index cda1bdd2..296ec2f9 100644 --- a/conf-logstash/10-preliminaries.conf +++ b/conf-logstash/10-preliminaries.conf @@ -2,7 +2,44 @@ filter { - # 1. Drop flows to or from private addresses (or other ranges we want to drop) + # 1. Check for missing fields that can cause logstash to crash + if ![meta][src_ip] { + mutate{ + id => "10-01" + add_tag => [ "src_ip was missing in orig data!?" ] + add_field => { "[meta][src_ip]" => "0.0.0.0" } + } + } + if ![meta][dst_ip] { + mutate{ + id => "10-02" + add_tag => [ "dst_ip was missing in orig data!?" ] + add_field => { "[meta][dst_ip]" => "0.0.0.0" } + } + } + if ![meta][src_ifindex] { + mutate{ + id => "10-03" + add_tag => [ "src_ifindex was missing!?" ] + add_field => { "[meta][src_ifindex]" => -10 } + } + } + if ![meta][dst_ifindex] { + mutate{ + id => "10-04" + add_tag => [ "dst_ifindex was missing in orig data!?" ] + add_field => { "[meta][dst_ifindex]" => -10 } + } + } + if ![values][duration] { + mutate{ + id => "10-05" + add_tag => [ "duration was missing in orig data!?" ] + add_field => { "[values][duration]" => 0 } + } + } + + # 2. Drop flows to or from private addresses (or other ranges we want to drop) cidr { id => "10-1" address => [ "%{[meta][src_ip]}" ] @@ -18,11 +55,12 @@ filter { add_field => { "@private_dst" => "yes" } } } + # drop if [@private_src] == "yes" or [@private_dst] == "yes" { drop { } } - # 2. Add @ingest_time field (useful for debugging) + # 3. Add @ingest_time field (useful for debugging) mutate { id => "10-3" add_field => { '@ingest_time' => "%{@timestamp}" } @@ -33,7 +71,7 @@ filter { target => "@ingest_time" } - # 3. Convert strings to numeric types where appropriate. We need to use these in calculations later. + # 4. Convert strings to numeric types where appropriate. We need to use these in calculations later. # Start and end are timestamps at this point. Make sure they are floats. mutate { id => "10-5" @@ -48,7 +86,7 @@ filter { } } - # 4. If duration (eg from aggregation by nfdump in importer) is <= .002, set it to 0. + # 5. If duration (eg from aggregation by nfdump in importer) is <= .002, set it to 0. # When duration is too small, bps calculation is highly inaccurate. if [values][duration] <= 0.002 { mutate { @@ -66,7 +104,7 @@ filter { flow_ts = event.get('start').to_f flow_te = event.get('end').to_f - # 5. Convert any timestamps in ms to s + # 6. Convert any timestamps in ms to s if flow_ts > 9999999999.0 flow_ts = flow_ts / 1000.0 event.set('start', flow_ts) @@ -76,7 +114,7 @@ filter { event.set('end', flow_te) end - # 6. DROP any event with a strange start or end time + # 7. DROP any event with a strange start or end time # > 10 sec in the future or > 1 year in the past, or end < start current_t = Time.now.to_f age_s = current_t - flow_ts From bfd67b810ed7a6b8bd968faef613587de58e21b9 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Mon, 30 Aug 2021 21:48:47 +0000 Subject: [PATCH 030/126] Couple more doc changes --- CHANGES.md | 1 + website/docs/deploy/docker_install_advanced.md | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 445673d2..d31b8e12 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -8,6 +8,7 @@ Features: * Added NORDUnet* and tacc_netflows to sensor group and type regexes * Added onenet-members-list.rb to the members-list files to download * At the beginning, set missing IPs to 0.0.0.0, missing ifindexes to -10, missing durations to 0. + * Increased version numbers for some website-related packages at Dependabot's request. Bugs: * Fixed es_doc_id. The hash was missing meta.id previously due to a bug. diff --git a/website/docs/deploy/docker_install_advanced.md b/website/docs/deploy/docker_install_advanced.md index e2750705..410e5387 100644 --- a/website/docs/deploy/docker_install_advanced.md +++ b/website/docs/deploy/docker_install_advanced.md @@ -158,8 +158,19 @@ You will also need to uncomment these lines in docker-compose.override.yml: By default, data is saved to subdirectories in the ./data/ directory (ie, the data/ directory in the git checkout). If you would like to use a different location, there are two options. -1. The best solution is to create a sym link between ./data/ and the preferred location, or, for an NFS volume, export it as ${PROJECT_DIR}/data -2. Alternatively, update the path to the in all locations in docker-compose.yml and docker-compose.override.yml Eg, to save nfcapd files to subdirs in /var, set the collector volumes to "- /var/input_data/netflow:/data" (similarly for sflow) and set the importer volumes to "- /var:/data". +1. The best solution is to create a symlink between ./data/ and the preferred location, or, for an NFS volume, export it as ${PROJECT_DIR}/data. + +During installation, delete the data/ directory (it should only contain .placeholder), then create your symlink. Eg, to use /var/netsage/ instead of data/, +```sh +cd {netsage-pipeline dir} +mkdir /var/netsage +rm data/.placeholder +rmdir data +ln -s /var/netsage {netsage-pipeline dir}/data +``` +(Check the permissions of the directory.) + +2. Alternatively, update volumes in docker-compose.yml and docker-compose.override.yml Eg, to save nfcapd files to subdirs in /mydir, set the collector volumes to `- /mydir/input_data/netflow:/data` (similarly for sflow) and set the importer and logstash volumes to `- /mydir:/data`. :::warning If you choose to update the docker-compose file, keep in mind that those changes will cause a merge conflict or be wiped out on upgrade. From 15cc461a4f3906e3e1958afb79afd9ace86a51a7 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Tue, 31 Aug 2021 16:06:26 +0000 Subject: [PATCH 031/126] Added release date --- CHANGES.md | 11 ++++++----- grnoc-netsage-deidentifier.spec | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index d31b8e12..07229f77 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,17 +1,18 @@ ------------------------------------------------------ -## GRNOC NetSage Deidentfier 1.2.11 -- Aug 2021 +## GRNOC NetSage Deidentfier 1.2.11 -- Sept 3, 2021 ------------------------------------------------------ Features: * Made filtering by ifindex (optionally) sensor-specific - * Added tags to flows with src and dst IPs = 0.0.0.x + * Added tags to flows with src and dst IPs = 0.0.0.x (user can set outputs filter to do something based on tags) * When duration <= 0.002 sec, set duration, bits/s, and packets/s to 0 as rates are inaccurate for small durations * Added NORDUnet* and tacc_netflows to sensor group and type regexes * Added onenet-members-list.rb to the members-list files to download - * At the beginning, set missing IPs to 0.0.0.0, missing ifindexes to -10, missing durations to 0. - * Increased version numbers for some website-related packages at Dependabot's request. + * Increased version numbers for some website-related packages is response to Dependabot + * Documentation improvements Bugs: - * Fixed es_doc_id. The hash was missing meta.id previously due to a bug. + * Fixed es_doc_id. The hash had been missing meta.id due to a bug. + * At the beginning of the pipeline, set missing IPs to 0.0.0.0, missing ifindexes to -10, missing durations to 0. ------------------------------------------------------ ## GRNOC NetSage Deidentfier 1.2.10 -- May 10 2021 diff --git a/grnoc-netsage-deidentifier.spec b/grnoc-netsage-deidentifier.spec index 1c53b7f2..eedfe0ea 100644 --- a/grnoc-netsage-deidentifier.spec +++ b/grnoc-netsage-deidentifier.spec @@ -1,7 +1,7 @@ Summary: GRNOC NetSage Flow-Processing Pipeline Name: grnoc-netsage-deidentifier - # update Version here, in conf-logstash/98-post-process.conf, lib/GRNOC/NetSage/Deidentifier.pm Version: 1.2.11 + # update Version here, in conf-logstash/98-post-process.conf, lib/GRNOC/NetSage/Deidentifier.pm Release: 1%{?dist} License: GRNOC Group: Measurement From 045078a6453f13ed91fc5d00445af809f164ea29 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 31 Aug 2021 20:48:36 +0000 Subject: [PATCH 032/126] Bump tar from 6.1.4 to 6.1.11 in /website Bumps [tar](https://github.com/npm/node-tar) from 6.1.4 to 6.1.11. - [Release notes](https://github.com/npm/node-tar/releases) - [Changelog](https://github.com/npm/node-tar/blob/main/CHANGELOG.md) - [Commits](https://github.com/npm/node-tar/compare/v6.1.4...v6.1.11) --- updated-dependencies: - dependency-name: tar dependency-type: indirect ... Signed-off-by: dependabot[bot] --- website/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/yarn.lock b/website/yarn.lock index a64dced7..7b43286a 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -9206,9 +9206,9 @@ tapable@^1.0.0, tapable@^1.1.3: integrity sha512-4WK/bYZmj8xLr+HUCODHGF1ZFzsYffasLUgEiMBY4fgtltdO6B4WJtlSbPaDTLpYTcGVwM2qLnFTICEcNxs3kA== tar@^6.0.2: - version "6.1.4" - resolved "https://registry.yarnpkg.com/tar/-/tar-6.1.4.tgz#9f0722b772a5e00dba7d52e1923b37a7ec3799b3" - integrity sha512-kcPWrO8S5ABjuZ/v1xQHP8xCEvj1dQ1d9iAb6Qs4jLYzaAIYWwST2IQpz7Ud8VNYRI+fGhFjrnzRKmRggKWg3g== + version "6.1.11" + resolved "https://registry.yarnpkg.com/tar/-/tar-6.1.11.tgz#6760a38f003afa1b2ffd0ffe9e9abbd0eab3d621" + integrity sha512-an/KZQzQUkZCkuoAA64hM92X0Urb6VpRhAFllDzz44U2mcD5scmT3zBc4VgVpkugF580+DQn8eAFSyoQt0tznA== dependencies: chownr "^2.0.0" fs-minipass "^2.0.0" From aa5903ecf7471d5dccdd0293fb6391e61b264052 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 1 Sep 2021 18:22:39 +0000 Subject: [PATCH 033/126] Doc changes --- website/docs/deploy/docker_install_advanced.md | 10 +++++----- website/docs/pipeline/logstash.md | 9 +++++---- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/website/docs/deploy/docker_install_advanced.md b/website/docs/deploy/docker_install_advanced.md index 410e5387..a8cc604c 100644 --- a/website/docs/deploy/docker_install_advanced.md +++ b/website/docs/deploy/docker_install_advanced.md @@ -16,7 +16,7 @@ If you have more than 1 sflow and/or 1 netflow sensor, you will need to create m Any number of sensors can be accomodated, although if there are more than a few being processed by the same Importer, you may run into issues where long-lasting flows from sensosr A time out in the aggregation step while waiting for flows from sensors B to D to be processed. (Another option might be be to run more than one Docker deployment.) -### a. Edit docker-compose.override.yml +#### a. Edit docker-compose.override.yml The pattern to add a flow collector is always the same. To add an sflow collector called example-collector, edit the docker-compose.override.yml file and add something like @@ -47,7 +47,7 @@ You will also need to uncomment these lines: ``` -### b. Edit netsage_override.xml +#### b. Edit netsage_override.xml To make the Pipeline Importer aware of the new data to process, you will need to create a custom Importer configuration: netsage_override.xml. This will replace the usual config file netsage_shared.xml. @@ -65,7 +65,7 @@ Edit netsage_override.xml and add a new "collection" section for the new sensor ``` -### c. Edit environment file +#### c. Edit environment file Then, in the .env file, add a line that sets a value for the "variable" you referenced above, $exampleSensorName. The value is the name of the sensor which will be saved to elasticsearch and which appears in Netsage Dashboards. Set it to something meaningful and unique. E.g., @@ -74,7 +74,7 @@ exampleSensorName=MyNet Los Angeles sFlow ``` -### d. Running the new collector +#### d. Running the new collector After doing the setup above and selecting the docker version to run, you can start the new collector by running the following line, using the collector name (or by running `docker-compose up -d` to start up all containers): @@ -85,7 +85,7 @@ docker-compose up -d example-collector ## To Keep Only Flows From Certain Interfaces If your sensors are exporting all flows, but only those using a particular interface are relevant, use this option in the .env file. The collectors and importer will save/read all incoming flows, but the logstash pipeline will drop those that do not have src_ifindex OR dst_inindex equal to one of those listed. -In the .env file, uncomment lines in the appropriate section and enter the information required. Be sure `ifindex_filter_flag=True` with "True" capitalized as shown, any sensor names are spelled exactly right, and list all the ifindex values of flows that should be kept and processed. Some examples: +In the .env file, uncomment lines in the appropriate section and enter the information required. Be sure `ifindex_filter_flag=True` with "True" capitalized as shown, any sensor names are spelled exactly right, and list all the ifindex values of flows that should be kept and processed. Some examples (use just one!): ```sh ifindex_filter_keep=123 diff --git a/website/docs/pipeline/logstash.md b/website/docs/pipeline/logstash.md index b27e2ee7..658b240a 100644 --- a/website/docs/pipeline/logstash.md +++ b/website/docs/pipeline/logstash.md @@ -16,6 +16,8 @@ Notes: ## Logstash Sequence +The main things done in each conf file are as follows. + ### 01-input-rabbit.conf Reads flows from a rabbitmq queue. (The ".disabled" extenstion can be removed from other 01-input configs available in conf.d/ to get flows from other sources.) @@ -25,12 +27,11 @@ Reads flows from a rabbitmq queue. (The ".disabled" extenstion can be removed fr Drops flows to or from private IP addresses; converts any timestamps in milliseconds to seconds; drops events with timestamps more than a year in the past or (10 sec) in the future; -does some data type conversions; -adds @ingest_time (this is mainly for developers). +sets duration and rates to 0 if duration is <= 0.002 sec (because tiny durations/few samples lead to inaccurate rates) ### 15-sensor-specific-changes.conf -Makes any changes to fields needed for specific sensors. This config currently provides 1) the ability to drop all flows that do not use interfaces (ifindexes) in a specfied list, 2) the ability to change the sensor name for flows from a specified sensor which go through a certain interface, and 3) the ability to apply a sampling rate correction manually for named sensors. You may edit the file in a bare-metal installation and specify everything explicitly (upgrades will not overwrite this config) or you may use the environment file specified in the systemd unit file. For Docker installations, use the .env file to specifiy the parameters. By default, this config will do nothing since the flags will be set to False. +Makes any changes to fields needed for specific sensors. This config currently provides 1) the ability to drop all flows that do not use interfaces (ifindexes) in a specfied list; lists can be sensor-specific, 2) the ability to change the sensor name for flows from a specified sensor which use a certain interface, and 3) the ability to apply a sampling rate correction manually for named sensors. You may edit the file in a bare-metal installation and specify everything explicitly (upgrades will not overwrite this config) or you may use the environment file specified in the systemd unit file. For Docker installations, use the .env file to specifiy the parameters. By default, this config will do nothing since the flags will be set to False. ### 20-add_id.conf @@ -53,7 +54,7 @@ Notes: Queries the MaxMind GeoLite2-City database by IP to get src and dst Countries, Continents, Latitudes, and Longitudes; if the destination IP is in the multicast range, sets the destination Organization, Country, and Continent to "Multicast". -*This product includes GeoLite2 data created by MaxMind, available from [www.maxmind.com](http://www.maxmind.com).* +*This product uses GeoLite2 data created by MaxMind, available from [www.maxmind.com](http://www.maxmind.com).* ### 50-asn.conf From adbaf940552e4eb881bb4f2b31904b19742ebb8a Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 1 Sep 2021 18:30:48 +0000 Subject: [PATCH 034/126] doc change --- website/docs/devel/documentation_guide.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/website/docs/devel/documentation_guide.md b/website/docs/devel/documentation_guide.md index 06c1c4a8..076628b2 100644 --- a/website/docs/devel/documentation_guide.md +++ b/website/docs/devel/documentation_guide.md @@ -32,9 +32,9 @@ $ cd netsage-pipeline/website $ yarn install ``` -### Local Development +### If Local Development -If you are working on your local machine, you can view changes to the docs in a browser as you work. Use the following commands to generate the static website content (gets written into the build directory), then start a local development server and open up a browser window in which to view the docs. Most changes you make will be reflected live without having to restart the server. +If you are working on your local machine, rather than sshing into a host, you can view changes to the docs in a browser as you work. Use the following commands to generate the static website content (gets written into the build directory), then start a local development server and open up a browser window in which to view the docs. Most changes you make will be reflected live without having to restart the server. ``` $ yarn build $ yarn start @@ -46,6 +46,7 @@ Whether on a local machine or a linux host, to make changes, edit the files in w When finished, git add, git commit, git push, as usual. Repeat as needed. +To view the changes you've made with some formatting, just go to the file on github in a browser. To see all of the formatting, read the "Deploying Docs to github.io" section below. ### Tagging a New release @@ -84,7 +85,7 @@ NOTE: You need to have created ssh keys on the host you are running this on and ### Removing a version -To remove version 1.2.6 for example. +To remove version 1.2.6 of the docs, for example, we need to: From cb62058f0c853ac013e6afd6c649ad4d7cb14153 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 1 Sep 2021 18:36:43 +0000 Subject: [PATCH 035/126] Adding version-1.2.11 docs --- website/docusaurus.config.js | 2 +- .../version-1.2.11/components/docker_env.md | 40 +++ .../components/docker_first_steps.md | 26 ++ .../components/docker_pipeline.md | 31 ++ .../deploy/bare_metal_install.md | 299 ++++++++++++++++++ .../version-1.2.11/deploy/choosing.md | 25 ++ .../deploy/docker_install_advanced.md | 216 +++++++++++++ .../deploy/docker_install_simple.md | 121 +++++++ .../deploy/docker_troubleshooting.md | 40 +++ .../version-1.2.11/deploy/docker_upgrade.md | 78 +++++ .../version-1.2.11/devel/docker.md | 83 +++++ .../devel/documentation_guide.md | 143 +++++++++ .../version-1.2.11/devel/pipeline_dataset.md | 34 ++ .../version-1.2.11/devel/tag.md | 46 +++ .../version-1.2.11/pipeline/elastic_search.md | 124 ++++++++ .../version-1.2.11/pipeline/importer.md | 14 + .../version-1.2.11/pipeline/intro.md | 37 +++ .../version-1.2.11/pipeline/logstash.md | 128 ++++++++ .../version-1.2.11/pipeline/nfdump.md | 17 + .../version-1.2.11/pipeline/tstat.md | 16 + .../version-1.2.11-sidebars.json | 89 ++++++ website/versions.json | 1 + 22 files changed, 1609 insertions(+), 1 deletion(-) create mode 100644 website/versioned_docs/version-1.2.11/components/docker_env.md create mode 100644 website/versioned_docs/version-1.2.11/components/docker_first_steps.md create mode 100644 website/versioned_docs/version-1.2.11/components/docker_pipeline.md create mode 100644 website/versioned_docs/version-1.2.11/deploy/bare_metal_install.md create mode 100644 website/versioned_docs/version-1.2.11/deploy/choosing.md create mode 100644 website/versioned_docs/version-1.2.11/deploy/docker_install_advanced.md create mode 100644 website/versioned_docs/version-1.2.11/deploy/docker_install_simple.md create mode 100644 website/versioned_docs/version-1.2.11/deploy/docker_troubleshooting.md create mode 100644 website/versioned_docs/version-1.2.11/deploy/docker_upgrade.md create mode 100644 website/versioned_docs/version-1.2.11/devel/docker.md create mode 100644 website/versioned_docs/version-1.2.11/devel/documentation_guide.md create mode 100644 website/versioned_docs/version-1.2.11/devel/pipeline_dataset.md create mode 100644 website/versioned_docs/version-1.2.11/devel/tag.md create mode 100644 website/versioned_docs/version-1.2.11/pipeline/elastic_search.md create mode 100644 website/versioned_docs/version-1.2.11/pipeline/importer.md create mode 100644 website/versioned_docs/version-1.2.11/pipeline/intro.md create mode 100644 website/versioned_docs/version-1.2.11/pipeline/logstash.md create mode 100644 website/versioned_docs/version-1.2.11/pipeline/nfdump.md create mode 100644 website/versioned_docs/version-1.2.11/pipeline/tstat.md create mode 100644 website/versioned_sidebars/version-1.2.11-sidebars.json diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js index ffc9afd6..591bebfb 100644 --- a/website/docusaurus.config.js +++ b/website/docusaurus.config.js @@ -55,7 +55,7 @@ module.exports = { remarkPlugins: [require("remark-import-partial")], sidebarPath: require.resolve("./sidebars.js"), // Please change this to your repo. - lastVersion: "1.2.10", + lastVersion: "1.2.11", versions: { current: { label: `master (unreleased)`, diff --git a/website/versioned_docs/version-1.2.11/components/docker_env.md b/website/versioned_docs/version-1.2.11/components/docker_env.md new file mode 100644 index 00000000..0bfe77ac --- /dev/null +++ b/website/versioned_docs/version-1.2.11/components/docker_env.md @@ -0,0 +1,40 @@ +Next, copy `env.example` to `.env` +```sh +cp env.example .env +``` + +then edit the .env file to set the sensor names to unique identifiers (with spaces or not, no quotes) +```sh +# Importer settings +sflowSensorName=My sflow sensor name +netflowSensorName=My netflow sensor name +``` + + - If you have only one collector, remove or comment out the line for the one you are not using. + - If you have more than one of the same type of collector, see the "Docker Advanced" documentation. + +:::note +These names uniquely identify the source of the data and will be shown in the Grafana dashboards. In elasticsearch, they are saved in the `meta.sensor_id` field. Choose names that are meaningful and unique. +For example, your sensor names might be "MyNet New York Sflow" and "MyNet Boston Netflow" or "MyNet New York - London" and "MyNet New York - Paris". Whatever makes sense in your situation. +::: + +You will also want to edit the **Logstash output rabbit queue** section. This section defines where the final data will land after going through the pipeline. By default, it will be written to a rabbitmq queue on `rabbit`, ie, the local rabbitMQ server running in the docker container. Enter a hostname to send to a remote rabbitMQ server (also the correct username, password, and queue key/name). + +```sh +rabbitmq_output_host=rabbit@mynet.edu +rabbitmq_output_username=guest +rabbitmq_output_pw=guest +rabbitmq_output_key=netsage_archive_input +``` +:::note +To send processed flow data to GlobalNOC at Indiana University, you will need to obtain settings for this section from your contact. A new queue may need to be set up at IU, as well as allowing traffic from your pipeline host. (At IU, data from the this final rabbit queue will be moved into an Elasticsearch instance for storage and viewing.) +::: + +The following options are described in the Docker Advanced section: + +**To drop all flows except those using the specfied interfaces**: Use if only some flows from a router are of interest and those can be identified by interface. + +**To change the sensor name for flows using a certain interface**: Use if you want to break out some flows coming into a port and give them a different sensor name. + +**To "manually" correct flow sizes and rates for sampling for specified sensors**: Use if sampling corrections are not being done automatically. Normally you do not need to use this, but check flows to be sure results are reasonable. + diff --git a/website/versioned_docs/version-1.2.11/components/docker_first_steps.md b/website/versioned_docs/version-1.2.11/components/docker_first_steps.md new file mode 100644 index 00000000..9a75fb05 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/components/docker_first_steps.md @@ -0,0 +1,26 @@ +#### saving this for now in case I need to put it back ####### + +Then checkout the latest version of the code. If you are a developer you'll want the latest version from master, otherwise please use make sure +you've checked out the latest tagged version. + +For example, +```sh +## Normal Deployment, eg, checkout version 1.2.8 +$ git fetch +$ git checkout v1.2.8 -b v1.2.8 + +## Developers +$ git fetch +$ git reset --hard origin/master +``` + +:::warning +git reset --hard will obliterate any changes. On initial installation, you should not have any, but if you do wish to save any state, please make sure you commit and backup to a feature branch before continuing + +Example: +```git commit -a -m "Saving local state"; git checkout -b feature/backup; git checkout master``` +::: + + +All instructions that follow assume these first steps were performed succesfully. If not, you'll likely run into errors down the line if the code doesn't match up with the instructions provided. + diff --git a/website/versioned_docs/version-1.2.11/components/docker_pipeline.md b/website/versioned_docs/version-1.2.11/components/docker_pipeline.md new file mode 100644 index 00000000..a0709f08 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/components/docker_pipeline.md @@ -0,0 +1,31 @@ +Start up the pipeline (all containers) using: + +```sh +# docker-compose up -d +``` + +This will also restart any containers/processes that have died. "-d" runs containers in the background. + +You can see the status of the containers and whether any have died (exited) using +```sh +# docker-compose ps +``` + +To check the logs for each of the containers, run + +```sh +# docker-compose logs +# docker-compose logs logstash +# docker-compose logs importer +etc. +``` + +Add `-f` or, e.g., `-f logstash` to see new log messages as they arrive. `--timestamps`, `--tail`, and `--since` are also useful -- look up details in Docker documentation. + +To shut down the pipeline (all containers) use + +```sh +# docker-compose down +``` + +Run all commands from the netsage-pipeline/ directory. diff --git a/website/versioned_docs/version-1.2.11/deploy/bare_metal_install.md b/website/versioned_docs/version-1.2.11/deploy/bare_metal_install.md new file mode 100644 index 00000000..c0c21510 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/deploy/bare_metal_install.md @@ -0,0 +1,299 @@ +--- +id: bare_metal_install +title: Manual Installation Guide +sidebar_label: Manual Installation +--- + +This document covers installing the NetSage Flow Processing Pipeline manually on a new machine (without using Docker). Steps should be followed below in order unless you know for sure what you are doing. This document assumes a RedHat Linux environment or one of its derivatives. + +## Data sources + +The Processing pipeline needs data to ingest in order to do anything. There are two types of data that can be consumed. + +1. sflow or netflow +2. tstat + +At least one of these must be set up on a sensor to provide the incoming flow data. + +Sflow and netflow data should be sent to ports on the pipeline host where nfcapd and/or sfcapd are ready to receive it. + +Tstat data should be sent directly to the logstash input RabbitMQ queue (the same one that the Importer writes to, if it is used). From there, the data will be processed the same as sflow/netflow data. + +## Installing the Prerequisites + +### Installing nfdump + +The nfdump package provides nfcapd and sfcapd processes which recieve flow data and write nfcapd files. +The Importer also uses nfdump. If you are only collecting tstat data, you do not need nfdump. + + +Nfdump is _not_ listed as a dependency of the Pipeline RPM package, as in a lot cases people are running special builds of nfdump -- but make sure you install it before you try running the Netflow Importer. If in doubt, `yum install nfdump` should work. +Flow data exported by some routers require a newer version of nfdump than the one in the CentOS repos; in these cases, it may be necessary to manually compile and install the lastest nfdump. + +:::note +It is recommended to check the version of nfdump used in the Docker installation and use the same or newer in order to be sure that any fixes for impactful issues are included. +::: + + +If desired, you can also install nfsen, which has a UI for viewing flow data and can manage starting and stopping all the nfcapd/sfcapd processes for you.The nfsen.conf file has a section in which to configure all the sources. + +### Installing RabbitMQ + +The pipeline requires a RabbitMQ server. Typically, this runs on the same server as the pipeline itself, but if need be, you can separate them (for this reason, the Rabbit server is not automatically installed with the pipeline package). + +```sh +[root@host ~]# yum install rabbitmq-server + +``` + +Typically, the default configuration will work. Perform any desired Rabbit configuration, then, start RabbitMQ: + +```sh +[root@host ~]# /sbin/service rabbitmq-server start + or # systemctl start rabbitmq-server.service +``` + +### Installing Logstash + +See the logstash documentation. We are currently using Version 7.10. + +### Installing the EPEL repo + +Some of our dependencies come from the EPEL repo. To install this: + +``` +[root@host ~]# yum install epel-release +``` + +### Installing the GlobalNOC Open Source repo + +The Pipeline package (and its dependencies that are not in EPEL) are in the GlobalNOC Open Source Repo. + +For Red Hat/CentOS 6, create `/etc/yum.repos.d/grnoc6.repo` with the following content. + +``` +[grnoc6] +name=GlobalNOC Public el6 Packages - $basearch +baseurl=https://repo-public.grnoc.iu.edu/repo/6/$basearch +enabled=1 +gpgcheck=1 +gpgkey=https://repo-public.grnoc.iu.edu/repo/RPM-GPG-KEY-GRNOC6 +``` + +For Red Hat/CentOS 7, create `/etc/yum.repos.d/grnoc7.repo` with the following content. + +``` +[grnoc7] +name=GlobalNOC Public el7 Packages - $basearch +baseurl=https://repo-public.grnoc.iu.edu/repo/7/$basearch +enabled=1 +gpgcheck=1 +gpgkey=https://repo-public.grnoc.iu.edu/repo/RPM-GPG-KEY-GRNOC7 +``` + +The first time you install packages from the repo, you will have to accept the GlobalNOC repo key. + +## Installing the Pipeline (Importer and Logstash configs) + +Install it like this: + +``` +[root@host ~]# yum install grnoc-netsage-deidentifier +``` + +Pipeline components: + +1. Flow Filter - GlobalNOC uses this for Cenic data to filter out some flows. Not needed otherwise. +2. Netsage Netflow Importer - required to read nfcapd files from sflow and netflow importers. (If using tstat flow sensors only, this is not needed.) +3. Logstash - be sure the number of logstash pipeline workers in /etc/logstash/logstash.yml is set to 1 or flow stitching/aggregation will not work right! +4. Logstash configs - these are executed in alphabetical order. See the Logstash doc. At a minimum, the input, output, and aggregation configs have parameters that you will need to update or confirm. + +Nothing will automatically start after installation as we need to move on to configuration. + +## Importer Configuration + +Configuration files of interest are + - /etc/grnoc/netsage/deidentifier/netsage_shared.xml - Shared config file allowing configuration of collections, and Rabbit connection information + - /etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml - other settings + - /etc/grnoc/netsage/deidentifier/logging.conf - logging config + - /etc/grnoc/netsage/deidentifier/logging-debug.conf - logging config with debug enabled + +### Setting up the shared config file + +`/etc/grnoc/netsage/deidentifier/netsage_shared.xml` + +There used to be many perl-based pipeline components and daemons. At this point, only the importer is left, the rest having been replaced by logstash. The shared config file, which was formerly used by all the perl components, is read before reading the individual importer config file. + +The most important part of the shared configuration file is the definition of collections. Each sflow or netflow sensor will have its own collection stanza. Here is one such stanza, a netflow example. Instance and router-address can be left commented out. + +``` + + + /path/to/netflow-files/ + + + Netflow Sensor 1 + + + sflow + + + + + + + + +``` + +Having multiple collections in one importer can sometimes cause issues for aggregation, as looping through the collections one at a time adds to the time between the flows, affecting timeouts. You can also set up multiple Importers with differently named shared and importer config files and separate init.d files. + +There is also RabbitMQ connection information in the shared config, though queue names are set in the Importer config. (The Importer does not read from a rabbit queue, but other old components did, so both input and output are set.) + +Ideally, flows should be deidentified before they leave the host on which the data is stored. If flows that have not be deidentified need to be pushed to another node for some reason, the Rabbit connection must be encrypted with SSL. + +If you're running a default RabbitMQ config, which is open only to 'localhost' as guest/guest, you won't need to change anything here. + +``` + + + 127.0.0.1 + 5672 + guest + guest + 0 + 100 + / + 1 + + + + 127.0.0.1 + 5672 + guest + guest + 0 + 100 + / + 1 + +``` + +### Setting up the Importer config file + +`/etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml` + +This file has a few more setting specific to the Importer component which you may like to adjust. + + - Rabbit_output has the name of the output queue. This should be the same as that of the logstash input queue. + - (The Importer does not actually use an input rabbit queue, so we add a "fake" one here.) + - Min-bytes is a threshold applied to flows aggregated within one nfcapd file. Flows smaller than this will be discarded. + - Min-file-age is used to be sure files are complete before being read. + - Cull-enable and cull-ttl can be used to have nfcapd files older than some number of days automatically deleted. + - Pid-file is where the pid file should be written. Be sure this matches what is used in the init.d file. + - Keep num-processes set to 1. + +```xml + + + + + + netsage_deidentifier_netflow_fake + 2 + + + + 3 + netsage_deidentifier_raw + + + + + 100 + + + 1 + + + + + + /var/cache/netsage/netflow_importer.cache + + + + 100000000 + + + 10m + + + + + + + + + + + + + /var/run/netsage-netflow-importer-daemon.pid + + + +``` + +## Logstash Setup Notes + +Standard logstash filter config files are provided with this package. Most should be used as-is, but the input and output configs may be modified for your use. + +The aggregation filter also has settings that may be changed as well - check the two timeouts and the aggregation maps path. + +When upgrading, these logstash configs will not be overwritten. Be sure any changes get copied into the production configs. + +FOR FLOW STITCHING/AGGREGATION - IMPORTANT! +Flow stitching (ie, aggregation) will NOT work properly with more than ONE logstash pipeline worker! +Be sure to set "pipeline.workers: 1" in /etc/logstash/logstash.yml and/or /etc/logstash/pipelines.yml. When running logstash on the command line, use "-w 1". + +## Start Logstash + +```sh +[root@host ~]# /sbin/service logstash start + or # systemctl start logstash.service +``` +It will take couple minutes to start. Log files are normally /var/log/messages and /var/log/logstash/logstash-plain.log. + +When logstash is stopped, any flows currently "in the aggregator" will be written out to /tmp/logstash-aggregation-maps (or the path/file set in 40-aggregation.conf). These will be read in and deleted when logstash is started again. + +## Start the Importer + +Typically, the daemons are started and stopped via init script (CentOS 6) or systemd (CentOS 7). They can also be run manually. The daemons all support these flags: + +`--config [file]` - specify which config file to read + +`--sharedconfig [file]` - specify which shared config file to read + +`--logging [file]` - the logging config + +`--nofork` - run in foreground (do not daemonize) + +```sh +[root@host ~]# /sbin/service netsage-netflow-importer start + or # systemctl start netsage-netflow-importer.service +``` +The Importer will create a deamon process and a worker process. When stopping the service, the worker process might take a few minutes to quit. If it does not quit, kill it by hand. + + +## Cron jobs + +Sample cron files are provided. Please review and uncomment their contents. These periodically download MaxMind, CAIDA, and Science Registry files, and also restart logstash. Logstash needs to be restarted in order for any updated files to be read in. + + + diff --git a/website/versioned_docs/version-1.2.11/deploy/choosing.md b/website/versioned_docs/version-1.2.11/deploy/choosing.md new file mode 100644 index 00000000..43ae4429 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/deploy/choosing.md @@ -0,0 +1,25 @@ +--- +id: choose_install +title: Choosing an Installation Procedure +sidebar_label: Choose Install +--- + +## Manual or BareMetal Installation + +The Manual (baremetal) Installation Guide will walk you through installing the pipeline using your own server infrastructure and requires you to maintain all the components involved. + +It will likely be a bit better when it comes to performance, and have greater flexibility, but there is also more complexity involved in configuring and setting up. + +If you are the ultimate consumer of the data then setting up a baremetal version might be worth doing. Or at least the final rabbitMQ that will be holding the data since it'll like need to handle a large dataset. + +## Dockerized Version + +The Docker version makes it trivial to bring up the pipeline for both a developer and consumer. The work is mostly already done for you. It should be a simple matter of configuring a few env settings and everything should 'just' work. + +If you are simply using the pipeline to deliver the anonymized network stats for someone else's consumption, then using the docker pipeline would be preferred. + +## Choose your adventure + +- [Manual/Server Installation](bare_metal_install) +- [Simple Docker](docker_install_simple.md) - 1 netflow sensor and/or 1 sflow sensor +- [Advanced Docker](docker_install_advanced.md) - options that allow for more complex configurations diff --git a/website/versioned_docs/version-1.2.11/deploy/docker_install_advanced.md b/website/versioned_docs/version-1.2.11/deploy/docker_install_advanced.md new file mode 100644 index 00000000..a8cc604c --- /dev/null +++ b/website/versioned_docs/version-1.2.11/deploy/docker_install_advanced.md @@ -0,0 +1,216 @@ +--- +id: docker_install_advanced +title: Docker Advanced Options Guide +sidebar_label: Docker Advanced Options +--- + +If the basic Docker Installation does not meet your needs, the following customizations will allow for more complex situations. Find the section(s) which apply to you. + +*Please first read the Docker Installation guide in detail. This guide will build on top of that.* + + +## To Add an Additional Sflow or Netflow Collector + +If you have more than 1 sflow and/or 1 netflow sensor, you will need to create more collectors and modify the importer config file. The following instructions describe the steps needed to add one additional sensor. + +Any number of sensors can be accomodated, although if there are more than a few being processed by the same Importer, you may run into issues where long-lasting flows from sensosr A time out in the aggregation step while waiting for flows from sensors B to D to be processed. (Another option might be be to run more than one Docker deployment.) + + +#### a. Edit docker-compose.override.yml + +The pattern to add a flow collector is always the same. To add an sflow collector called example-collector, edit the docker-compose.override.yml file and add something like + +```yaml + example-collector: + image: netsage/nfdump-collector:alpine-1.6.23 + restart: always + command: sfcapd -T all -l /data -S 1 -w -z -p 9997 + volumes: + - ./data/input_data/example:/data + ports: + - "9997:9997/udp" +``` + +- collector name: should be updated to something that has some meaning, in our example "example-collector". +- image: copy from the default collector sections already in the file. +- command: choose between "sfcapd" for sflow and "nfcapd" for netflow, and at the end of the command, specify the port to watch for incoming flow data. +- volumes: specify where to write the nfcapd files. Make sure the path is unique and in ./data/. In this case, we're writing to ./data/input_data/example. Change "example" to something meaningful. +- ports: make sure the port here matches the port you've set in the command. Naturally all ports have to be unique for this host and the router should be configured to export data to the same port. (?? If the port on your docker container is different than the port on your host/local machine, use container_port:host_port.) + +Make sure the indentation is right or you'll get an error about yaml parsing. + +You will also need to uncomment these lines: + +```yaml + volumes: + - ./userConfig/netsage_override.xml:/etc/grnoc/netsage/deidentifier/netsage_shared.xml +``` + + +#### b. Edit netsage_override.xml + +To make the Pipeline Importer aware of the new data to process, you will need to create a custom Importer configuration: netsage_override.xml. This will replace the usual config file netsage_shared.xml. + +```sh +cp compose/importer/netsage_shared.xml userConfig/netsage_override.xml +``` + +Edit netsage_override.xml and add a new "collection" section for the new sensor as in the following example. The flow-path should match the path set above in docker-compose.override.yml. $exampleSensorName is a new "variable"; don't replace it here, it will be replaced with a value that you set in the .env file. For the flow-type, enter "sflow" or "netflow" as appropriate. (Enter "netflow" if you're running IPFIX.) + +```xml + + /data/input_data/example/ + $exampleSensorName + sflow + +``` + +#### c. Edit environment file + +Then, in the .env file, add a line that sets a value for the "variable" you referenced above, $exampleSensorName. The value is the name of the sensor which will be saved to elasticsearch and which appears in Netsage Dashboards. Set it to something meaningful and unique. E.g., + +```ini +exampleSensorName=MyNet Los Angeles sFlow +``` + + +#### d. Running the new collector + +After doing the setup above and selecting the docker version to run, you can start the new collector by running the following line, using the collector name (or by running `docker-compose up -d` to start up all containers): + +```sh +docker-compose up -d example-collector +``` + +## To Keep Only Flows From Certain Interfaces +If your sensors are exporting all flows, but only those using a particular interface are relevant, use this option in the .env file. The collectors and importer will save/read all incoming flows, but the logstash pipeline will drop those that do not have src_ifindex OR dst_inindex equal to one of those listed. + +In the .env file, uncomment lines in the appropriate section and enter the information required. Be sure `ifindex_filter_flag=True` with "True" capitalized as shown, any sensor names are spelled exactly right, and list all the ifindex values of flows that should be kept and processed. Some examples (use just one!): + +```sh +ifindex_filter_keep=123 +ifindex_filter_keep=123,456 +ifindex_filter_keep=Sensor 1: 789 +ifindex_filter_keep=123; Sensor 1: 789; Sensor 2: 800, 900 +``` + +In the first case, all flows that have src_ifindex = 123 or dst_ifindex = 123 will be kept, regardless of sensor name. (Note that this may be a problem if you have more than 1 sensor with the same ifindex values!) +In the 2nd case, if src or dst ifindex is 123 or 456, the flow will be processed. +In the 3rd case, only flows from Sensor 1 will be filtered, with flows using ifindex 789 kept. +In the last example, any flow with ifindex 123 will be kept. Sensor 1 flows with ifindex 789 (or 123) will be kept, and those from Sensor 2 having ifindex 800 or 900 (or 123) will be kept. + +Spaces don't matter except within the sensor names. Punctuation is required as shown. + + +## To Change a Sensor Name Depending on the Interface Used +In some cases, users want to keep all flows from a certain sensor but differentiate between those that enter or exit through specific sensor interfaces. This can be done by using this option in the .env file. + +In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all 4 fields are set properly! For example, + +```sh +ifindex_sensor_rename_flag=True +ifindex_sensor_rename_old_name=IU Sflow +ifindex_sensor_rename_new_name=IU Bloomington Sflow +ifindex_sensor_rename_ifindex=10032 +``` + +In this case, any flows from the "IU Sflow" sensor that use interface 10032 (src_ifindex = 10032 OR dst_ifindex = 10032) will have the sensor name changed from "IU Sflow" to "IU Bloomington Sflow". Currently, only one such rename can be configured in Docker and only 1 ifindex is allowed. + +:::note +Please notify the devs at IU in advance, if you need to modify a sensor name, because the regexes used for determining sensor_group and sensor_type may have to be updated. +::: + +## To Do Sampling Rate Corrections in Logstash +When flow sampling is done, corrections have to be applied. For example, if you are sampling 1 out of 100 flows, for each flow measured, it is assumed that in reality there would be 100 flows of that size with that src and dst, so the number of bits (and the number of packets, bits/s and packets/s) is multiplied by 100. Usually the collector (nfcapd or sfcapd process) gets the sampling rate from the incoming data and applies the correction, but in some cases, the sensor may not send the sampling rate, or there may be a complex set-up that requires a manual correction. With netflow, a manual correction can be applied using the '-s' option in the nfsen config, if nfsen is being used, or the nfcapd command, but this is not convenient when using Docker. For sflow, there is no such option. In either case, the correction can be made in logstash as follows. + +In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all 3 fields are set properly! The same correction can be applied to multiple sensors by using a comma-separed list. The same correction applies to all listed sensors. For example, + +```sh +sampling_correction_flag=True +sampling_correction_sensors=IU Bloomington Sflow, IU Sflow +sampling_correction_factor=512 +``` + +## To Change How Long Nfcapd Files Are Kept +The importer will automatically delete older nfcapd files for you, so that your disk doesn't fill up. By default, 3 days worth of files will be kept. This can be adjusted by making a netsage_override.xml file: + +```sh +cp compose/importer/netsage_shared.xml userConfig/netsage_override.xml +``` + +At the bottom of the file, edit this section to set the number of days worth of files to keep. Set cull-enable to 0 for no culling. Eg, to save 1 days worth of data: +````xml + + 1 + 1 + +```` + +You will also need to uncomment these lines in docker-compose.override.yml: + +```yaml + volumes: + - ./userConfig/netsage_override.xml:/etc/grnoc/netsage/deidentifier/netsage_shared.xml +``` + + +## To Save Flow Data to a Different Location + +By default, data is saved to subdirectories in the ./data/ directory (ie, the data/ directory in the git checkout). If you would like to use a different location, there are two options. + +1. The best solution is to create a symlink between ./data/ and the preferred location, or, for an NFS volume, export it as ${PROJECT_DIR}/data. + +During installation, delete the data/ directory (it should only contain .placeholder), then create your symlink. Eg, to use /var/netsage/ instead of data/, +```sh +cd {netsage-pipeline dir} +mkdir /var/netsage +rm data/.placeholder +rmdir data +ln -s /var/netsage {netsage-pipeline dir}/data +``` +(Check the permissions of the directory.) + +2. Alternatively, update volumes in docker-compose.yml and docker-compose.override.yml Eg, to save nfcapd files to subdirs in /mydir, set the collector volumes to `- /mydir/input_data/netflow:/data` (similarly for sflow) and set the importer and logstash volumes to `- /mydir:/data`. + +:::warning +If you choose to update the docker-compose file, keep in mind that those changes will cause a merge conflict or be wiped out on upgrade. +You'll have to manage the volumes exported and ensure all the paths are updated correctly for the next release manually. +::: + +## To Customize Java Settings / Increase Memory Available for Lostash + + +If cpu or memory seems to be a problem, try increasing the JVM heap size for logstash from 2GB to 3 or 4, no more than 8. + +To do this, edit LS_JAVA_OPTS in the .env file. [is this working??] +```yaml +LS_JAVA_OPTS=-Xmx4g -Xms4g +``` + +Here are some tips for adjusting the JVM heap size (https://www.elastic.co/guide/en/logstash/current/jvm-settings.html): + +- Set the minimum (Xms) and maximum (Xmx) heap allocation size to the same value to prevent the heap from resizing at runtime, which is a very costly process. +- CPU utilization can increase unnecessarily if the heap size is too low, resulting in the JVM constantly garbage collecting. You can check for this issue by doubling the heap size to see if performance improves. +- Do not increase the heap size past the amount of physical memory. Some memory must be left to run the OS and other processes. As a general guideline for most installations, don’t exceed 50-75% of physical memory. The more memory you have, the higher percentage you can use. + +To modify other logstash settings, rename the provided example file for JVM Options and tweak the settings as desired: + +```sh +cp userConfig/jvm.options_example userConfig/jvm.options +``` + +Also update the docker-compose.override.xml file to uncomment lines in the logstash section. It should look something like this: + +```yaml +logstash: + image: netsage/pipeline_logstash:latest + volumes: + - ./userConfig/jvm.options:/usr/share/logstash/config/jvm.options +``` + +## To Bring up Kibana and Elasticsearch Containers + +The file docker-compose.develop.yaml can be used in conjunction with docker-compose.yaml to bring up the optional Kibana and Elastic Search components. + +This isn't a production pattern but the tools can be useful at times. Please refer to the [Docker Dev Guide](../devel/docker_dev_guide#optional-elasticsearch-and-kibana) + diff --git a/website/versioned_docs/version-1.2.11/deploy/docker_install_simple.md b/website/versioned_docs/version-1.2.11/deploy/docker_install_simple.md new file mode 100644 index 00000000..1c538ecf --- /dev/null +++ b/website/versioned_docs/version-1.2.11/deploy/docker_install_simple.md @@ -0,0 +1,121 @@ +--- +id: docker_install_simple +title: Docker Installation Guide +sidebar_label: Docker Installation +--- +In this deployment guide, you will learn how to deploy a basic Netsage setup that includes one sflow and/or one netflow collector. If you have more than one collector of either type, or other special situations, see the Docker Advanced guide. + +The Docker containers included in the installation are + - rabbit (the local RabbitMQ server) + - sflow-collector (receives sflow data and writes nfcapd files) + - netflow-collector (receives netflow data and writes nfcapd files) + - importer (reads nfcapd files and puts flows into a local rabbit queue) + - logstash (logstash pipeline that processes flows and sends them to their final destination, by default a local rabbit queue) + - ofelia (cron-like downloading of files used by the logstash pipeline) + +The code and configs for the importer and logstash pipeline can be viewed in the netsage-project/netsage-pipeline github repo. See netsage-project/docker-nfdump-collector for code related to the collectors. + + +### 1. Set up Data Sources +The data processing pipeline needs data to ingest in order to do anything, of course. There are three types of data that can be consumed. + + - sflow + - netflow + - tstat + +At least one of these must be set up on a *sensor* (i.e., flow *exporter* / router), to provide the incoming flow data. +You can do this step later, but it will helpful to have it working first. + +Sflow and netflow data should be exported to the pipeline host where there will be *collectors* (nfcapd and/or sfcapd processes) ready to receive it (see below). To use the default settings, send sflow to port 9998 and netflow/IPFIX to port 9999. On the pipeline host, allow incoming traffic from the flow exporters, of course. + +Tstat data should be sent directly to the logstash input rabbit queue "netsage_deidentifier_raw" on the pipeline host. No collector is needed for tstat data. See the netsage-project/tstat-transport repo. (From there, logstash will grab the data and process it the same way as it processes sflow/netflow data. (See the Docker Advanced guide.) + +### 2. Set up a Pipeline Host +Decide where to run the Docker Pipeline and get it set up. Adjust iptables to allow the flow exporters (routers) to send flow data to the host. + +Install Docker Engine (docker-ce, docker-ce-cli, containerd.io) - see instructions at [https://docs.docker.com/engine/install/](https://docs.docker.com/engine/install/). + +Install Docker Compose from Docker's GitHub repository - see [https://docs.docker.com/compose/install/](https://docs.docker.com/compose/install/). You need to **specify version 1.29.2** (or newer) in the curl command. + +Check default file permissions. If the *logstash* user is not able to access the logstash config files in the git checkout, you'll get an error from logstash saying there are no .conf files found even though they are there. Various components also need to be able to read and write to the data/ directory in the checkout. Defaults of 775 (u=rwx, g=rwx, o=rx) should work. + +### 3. Clone the Netsage Pipeline Project + +Clone the netsage-pipeline project from github. +```sh +git clone https://github.com/netsage-project/netsage-pipeline.git +``` + +When the pipeline runs, it uses the logstash conf files that are in the git checkout (in conf-logstash/), as well as a couple other files like docker-compose.yml, so it is important to checkout the correct version. + +Move into the netsage-pipeline/ directory (**all git and docker commands must be run from inside this directory!**), then checkout the most recent version of the code. It will say you are in 'detached HEAD' state. +```sh +git checkout {tag} +``` +Replace "{tag}" with the release version you intend to use, e.g., "v1.2.11". ("Master" is the development version and is not intended for general use!) +`git status` will confirm which branch you are on, e.g., master or v1.2.11. + +### 4. Create Docker-compose.override.yml + +Information in the `docker-compose.yml` file tells docker which containers (processes) to run and sets various parameters for them. +Settings in the `docker-compose.override.yml` file will overrule and add to those. Note that docker-compose.yml should not be edited since upgrades will replace it. Put all customizations in the override file, since override files will not be overwritten. + +Collector settings may need to be edited by the user, so the information that docker uses to run the collectors is specified (only) in the override file. Therefore, docker-compose_override.example.yml must always be copied to docker-compose_override.yml. + +```sh +cp docker-compose.override_example.yml docker-compose.override.yml +``` + +By default docker will bring up a single sflow collector and a single netflow collector that listen to udp traffic on ports localhost:9998 and 9999. If this matches your case, you don't need to make any changes to the docker-compose.override_example.yml. + +- If you have only one collector, remove or comment out the section for the one not needed so the collector doesn't run and simply create empty nfcapd files. +- If the collectors need to listen to different ports, make the appropriate changes here in both the "command:" and "ports:" lines. +- By default, the collectors will save flows to nfcapd files in sflow/ and netflow/ subdirectories in `./data/input_data/` (i.e., the data/ directory in the git checkout). If you need to save the data files to a different location, see the Docker Advanced section. + +Other lines in this file you can ignore for now. + +:::note +If you run into issues, try removing all the comments in the override file as they may conflict with the parsing done by docker-compose, though we have not found this to be a problem. +::: + +### 5. Choose Pipeline Version + +Once you've created the docker-compose.override.xml file and finished adjusting it for any customizations, you're ready to select which image versions Docker should run. + +```sh +./scripts/docker_select_version.sh +``` +When prompted, select the **same version** you checked out earlier. + +This script will replace the version numbers of docker images in docker-compose.override.yml and docker-compose.yml with the correct values. + +### 6. Create Environment File + +{@import ../components/docker_env.md} + +## Testing the Collectors + +At this point, you can start the two flow collectors by themselves by running the following line. If you only need one of the collectors, remove the other from this command. + +(See the next section for how to start all the containers, including the collectors.) + +```sh +docker-compose up -d sflow-collector netflow-collector +``` + +Subdirectories for sflow/netflow, year, month, and day are created automatically under `data/input_data/`. File names contain dates and times. +These are not text files; to view the contents, use an [nfdump command](http://www.linuxcertif.com/man/1/nfdump/) (you will need to install nfdump). +Files will be deleted automatically by the importer as they age out (the default is to keep 3 days). + +If the collector(s) are running properly, you should see nfcapd files being written every 5 minutes and they should have sizes of more than a few hundred bytes. (Empty files still have header and footer lines.) +See Troubleshooting if you have problems. + +To stop the collectors +```sh +docker-compose down +``` + +## Running the Collectors and Pipeline + +{@import ../components/docker_pipeline.md} + diff --git a/website/versioned_docs/version-1.2.11/deploy/docker_troubleshooting.md b/website/versioned_docs/version-1.2.11/deploy/docker_troubleshooting.md new file mode 100644 index 00000000..8b10ed1d --- /dev/null +++ b/website/versioned_docs/version-1.2.11/deploy/docker_troubleshooting.md @@ -0,0 +1,40 @@ +--- +id: docker_troubleshoot +title: Docker Troubleshooting +sidebar_label: Troubleshooting +--- + +## Troubleshooting + +### If you are not seeing flows after installation + +**Troubleshooting checklist:** + +- Make sure you configured your routers to point to the correct address/port where the collector is running.  +- Check iptables on your pipeline host to be sure incoming traffic from the routers is allowed. +- Use `docker-compose ps` to be sure the collectors (and other containers) are running. +- Check to see if nfcapd files are being written. There should be a directory for the year, month, day and files should be larger than a few hundred bytes. If the files exist but are too small, the collector is running but there are no incoming flows. "nfdump -r filename" will show the flows in a file. +- Make sure you created .env and docker-compose.override.yml files and updated the settings accordingly, sensorName especially since that identifies the source of the data. +- Check the logs of the various containers to see if anything jumps out as being invalid.  `docker-compose logs -f $service_label` +- Check the logs to see if logstash is starting successfully. +- If the final rabbit queue is on an external host, check iptables on that host to be sure incoming traffic from your pipeline host is allowed. + +To see if flows are getting into and being read from the rabbit queue on the pipeline host, you can go to `http://localhost:15672` in your favorite web browser. Login as guest with password guest. Look for accumulating messages and/or messages being acknowledged and published. + +### If flow collection stops + +*Errors:** +- See if any of the containers has died. `docker ps` +- Check the logs of the various containers to see if anything jumps out as being invalid. Eg, `docker-compose logs logstash`. +- If logstash dies with an error about not finding \*.conf files, make sure conf-logstash/ and directories and files within are readable by everyone (and directories are executable by everyone). The data/ directory and subdirectories need to be readable and writable by everyone, as well. + +**Disk space:** +- If the pipeline suddenly fails, check to see if the disk is full. If it is, first try getting rid of old docker images and containers to free up space: `docker image prune -a` and `docker container prune`. +- Also check to see how much space the nfcapd files are consuming. You may need to add more disk space. You could also try deleting nfcapd files after a fewer number of days (see Docker Advanced). + +**Memory:** +- If you are running a lot of data, sometimes docker may need to be allocated more memory. The most +likely culprit is logstash which is only allocated 2GB of RAM by default. + +Please see the Docker Advanced guide. + diff --git a/website/versioned_docs/version-1.2.11/deploy/docker_upgrade.md b/website/versioned_docs/version-1.2.11/deploy/docker_upgrade.md new file mode 100644 index 00000000..ba9ab726 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/deploy/docker_upgrade.md @@ -0,0 +1,78 @@ +--- +id: docker_upgrade +title: Upgrading +sidebar_label: Docker - Upgrading +--- + +To upgrade a previous installment of the Dockerized pipeline, perform the following steps. + +### Shut things down + +```sh +cd {netsage-pipeline directory} +docker-compose down +``` +This will stop all the docker containers, including the importer, logstash, and any collectors. Note that incoming flow data will not be saved during the time the collectors are down. + +### Update Source Code + +To upgrade to a new release, pull new code from github and ine images from dockerhub. Your customized .env and override files will not be overwritten. + +Update the git repo, mainly to be able to see the latest tags. +```sh +git reset --hard +git pull origin master +``` + +:::warning +git reset --hard will obliterate any changes you have made to non-override files. If necessary, please make sure you commit and save to a feature branch before continuing. + +Example: +```git commit -a -m "Saving local state"; git checkout -b feature/backup; git checkout master``` +::: + +Run these three commands to select the new release you want to run. In the first, replace "{tag}" by the version to run (eg, v1.2.10). When asked by the third, select the same version as the tag you checked out. +```sh +git checkout -b {tag} +git pull +./scripts/docker_select_version.sh +``` +Check to be sure docker-compose.yml and docker-compose.override.yml both now have the version number you selected for pipeline_importer and pipeline_logstash. + +### Check/Update Override Files +Occasionally, something may change which will necessitate editing your override and/or env file. + +- Compare the new `docker-compose.override_example.yml` file to your `docker-compose.override.yml`. Look for`version: "x.x"` at the top. If the version number is different, change it in your docker-compose.override.yml file. (This is the Compose file format version.) + +- Check to see if the version of nfdump has changed. Look for lines like `image: netsage/nfdump-collector:`. Make sure the version in your override file matches what is the example file. (You do not need to actually perform any upgrade yourself. This will ensure the correct version is pulled from Docker Hub.) + +- Also compare your `.env` file with the new `env.example` file to see if any new lines or sections have been added. If there have been any changes relevant to your deployment, eg, new options you want to use, copy the changes into your .env file. + +- If you used the Docker Advanced guide to make a `netsage_override.xml` file, compare it to `netsage_shared.xml` to see if there are any changes. This is unlikely. + + +### Update Docker Containers + +Do not forget this step! Pull new images from Docker Hub. This applies for both development and release versions. + +``` +docker-compose pull +``` + +### Restart all the Docker Containers + +``` +docker-compose up -d +``` + +This will start all the services/containers listed in the docker-compose.yml and docker-compose.override.yml files, including the importer, logstash pipeline, and collectors. + +### Delete old images and containers + +To save space, delete any old images and containers that are not being used. + +``` +docker image prune -a +docker container prune +``` + diff --git a/website/versioned_docs/version-1.2.11/devel/docker.md b/website/versioned_docs/version-1.2.11/devel/docker.md new file mode 100644 index 00000000..21cb7d5c --- /dev/null +++ b/website/versioned_docs/version-1.2.11/devel/docker.md @@ -0,0 +1,83 @@ +--- +id: docker_dev_guide +title: Docker Dev Guide +sidebar_label: Docker Dev Guide +--- + +## Selecting a Version + +You can use the "master" version or a tagged version. +To select a released version use the docker_select_version.sh script (see the Deployment Guide). +If you wish to use the development version (master branch) simply skip the docker_select_version.sh step. + +## Installing + +See the Deployment Guide to learn how to set up collectors, your environment and override files, etc. + +## Importer + +The importer "shared" config that Docker uses is defined in compose/netsage_shared.xml. ** NOTE: If you want to make changes to this file, you will need to rebuild the container** + +## Build Images + +The images are published on Docker Hub, but if you'd like to incorporate local changes please follow the process below. + +### Build Using Source Code + +If you would like to build the *importer* container using the version of the pipeline scripts found in the GitHub repo then run the following: + +```sh +docker-compose -f docker-compose.build.yml build + +``` + +NOTE: The importer container includes the config files for the logstash pipeline. + + +## Optional: ElasticSearch and Kibana + +You can optionally store flow data locally in an ElasticSearch container and view the data with Kibana. Local storage can be enabled with the following steps: + +1. Uncomment the following lines in conf-logstash/99-outputs.conf: + +``` +elasticsearch { + hosts => ["elasticsearch"] + index => "netsage_flow-%{+YYYY.MM.dd}" +} +``` + +2. Comment out the `rabbitmq {...}` block in conf-logstash/99-outputs.conf if you do not want to also send logstash output to RabbitMQ. + +3. Run the containers using the following line: ` ` ` docker-compose -f docker-compose.yml -f docker-compose.develop.yml up -d ` ` ` + +## Handy Docker Commands + +### Start the Containers + +``` sh +docker-compose up -d +``` + +### Stop the Containers + +``` sh +docker-compose stop && docker-compose rm +``` + +### Enter a Container Shell + +``` sh +docker-compose exec logstash bash #bash shell in logstash container +docker-compose exec importer bash #bash shell in importer container +docker-compose exec rabbit bash #bash shell in rabbit container +``` + +### View Container Logs + +``` sh +docker-compose logs -f #view logs for all containers +docker-compose logs -f logstash #view logs for logstash container +docker-compose logs -f importer #view logs for importer container +docker-compose logs -f rabbit #view logs for rabbit container +``` diff --git a/website/versioned_docs/version-1.2.11/devel/documentation_guide.md b/website/versioned_docs/version-1.2.11/devel/documentation_guide.md new file mode 100644 index 00000000..076628b2 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/devel/documentation_guide.md @@ -0,0 +1,143 @@ +--- +id: docusaurus +title: Revising Documentation +sidebar_label: Docusaurus +--- + +This project's documentation uses Docusaurus. + +Docusaurus converts markdown into html and builds a static website using React UI components, which can be exported to a webserver. + +Yarn is a package manager for JavaScript and replaces the npm client. It is not strictly necessary but highly encouraged. + +To extend the docs simply create a markdown file and reference the ID in the side bar config. Please see the related documentation +at the [docusaurus 2](https://v2.docusaurus.io/) project website. + +*THE FOLLOWING INSTRUCTIONS ARE NOT CONFIRMED TO WORK. PLEASE UPDATE WITH CORRECTIONS.* + +## If Not Using Docker +These are instructions for editing and releasing docs without using Docker. + +### Installation + +To get started the first time, install npm, then use that to install yarn +``` +$ sudo yum install npm +$ sudo npm install -g yarn +``` + +Git clone the netsage pipeline project, then run yarn install to get all the dependencies listed within package.json +``` +$ cd netsage-pipeline/website +$ yarn install +``` + +### If Local Development + +If you are working on your local machine, rather than sshing into a host, you can view changes to the docs in a browser as you work. Use the following commands to generate the static website content (gets written into the build directory), then start a local development server and open up a browser window in which to view the docs. Most changes you make will be reflected live without having to restart the server. +``` +$ yarn build +$ yarn start +go to http://localhost:3000 +``` + +### To Make Changes +Whether on a local machine or a linux host, to make changes, edit the files in website/docs/. +When finished, git add, git commit, git push, as usual. +Repeat as needed. + +To view the changes you've made with some formatting, just go to the file on github in a browser. To see all of the formatting, read the "Deploying Docs to github.io" section below. + +### Tagging a New release + +When it's time to release a new version of the Pipeline, you need to create a new version of the docs as well. + +Once the documentation is stable and you don't forsee any new change, please do the following: + +``` +$ yarn run docusaurus docs:version a.b.c +``` + +replacing a.b.c with the next release version number. +This will create new versioned docs in website/versioned_docs/. + +Then edit docusaurus.config.js and change `lastVersion:` to refer to the new version number. + +Finally, commit and push the following to github: + * website/versioned_docs/version-a.b.c/ + * website/versioned_sidebars/version-a.b.c.sidebars.json + * versions.json + * docusaurus.config.js + + +### Deploying Docs to github.io +Whether you have created a new set of versioned tags or just want to update the docs in "master", to make changes appear at https://netsage-project.github.io/netsage-pipeline, do the following. + +If Travis or some other CI is working, it will run yarn install and yarn deploy to do this automatically. + +If it is not, do it manually: +``` +$ USE_SSH="true" GIT_USER="your-username" yarn deploy +``` +replacing your-username. This sets a couple env vars then runs 'yarn deploy' which runs 'docusaurus deploy' (see package.json) which pushes the static website created to url: "https://netsage-project.github.io" (see docusaurus.config.js) + +NOTE: You need to have created ssh keys on the host you are running this on and added them to your github account. + +### Removing a version + +To remove version 1.2.6 of the docs, for example, + +we need to: + + * update versions.json to remove the reference + * remove the versioned_docs/version-1.2.6 + * remove versioned_sidebars/version-1.2.6-sidebars.json + +## If Using Docker + +You may also use a docs Docker container to simplify installation, making changes, and deployment. This method starts a local web server that allows you to see changes to the docs in a browser on your local machine, as they are made. + +### Build and Start the Container + +Git clone the netsage pipeline project then build and start the container. +The Dockerfile in website/ tells how to build an image that runs yarn. Docker-compose.yml brings up a docs container. +``` +$ cd netsage-pipeline/website +$ docker-compose build build_docs +$ docker-compose up -d docs +go to http://localhost:8000/netsage-pipeline/ +``` + +### To Make Changes +Whether on a local machine or a linux host, to make changes, edit the files in website/docs/. +When finished, git add, git commit, git push, as usual. +Repeat as needed. + +### Tagging a New release + +When it's time to release a new version of the Pipeline, you need to create a new version of the docs as well. + +Once the documentation is stable and you don't forsee any new change, please do the following: + +``` +$ docker-compose build build_docs +$ docker-compose run docs yarn run docusaurus docs:version a.b.c +``` +replacing a.b.c with the next release version number. +This will create new versioned docs in website/versioned_docs/. + +Then edit docusaurus.config.js and change `lastVersion:` to refer to the new version number. + +Finally, commit and push the following to github: + * website/versioned_docs/version-a.b.c/ + * website/versioned_sidebars/version-a.b.c.sidebars.json + * versions.json + * docusaurus.config.js + + +### Deploying Docs to github.io +How to do this when using Docker ??? Get into the container ??? + +For now, go a linux server that has yarn installed and +follow the instructions under If Not Using Docker. + diff --git a/website/versioned_docs/version-1.2.11/devel/pipeline_dataset.md b/website/versioned_docs/version-1.2.11/devel/pipeline_dataset.md new file mode 100644 index 00000000..a061957d --- /dev/null +++ b/website/versioned_docs/version-1.2.11/devel/pipeline_dataset.md @@ -0,0 +1,34 @@ +--- +id: dev_dataset +title: Pipeline Replay Dataset +sidebar_label: Replay Dataset +--- + +The Netsage Pipeline processes network data. Though there are some components and patterns we can use to test +the behavior using things like the Ruby unit [tests](https://github.com/netsage-project/netsage-pipeline/tree/master/conf-logstash/ruby/spec) in logstash, and the [generator](https://www.elastic.co/guide/en/logstash/current/plugins-inputs-generator.html) pligin, but the best +test is to replay network data and inspect the output in the grafana dashboard. + +Two sample data set are provided for the two types of collectors we have (Netflow and Sflow). The network data and ips have been anonymized and should have no identifying information. + +You can download the files from [here](https://drive.google.com/drive/folders/19fzY5EVoKwtYUaiBJq5OxAR82yDY0taG). + +Please take note of which ports the collectors are listing on. Check your docker-compose.override.yml file. If you are using default ports, they should match this [example](https://github.com/netsage-project/netsage-pipeline/blob/master/docker-compose.override_example.yml). + +Currently the default ports are: + - 9998/udp for sflow + - 9999/udp for netflow + +Naturally the collectors have to be running in order for any of this to be usable. You can read more on how to get them running in the [Docker Simple Deployment Guide](../deploy/docker_install_simple.md#running-the-collectors) + +In order to replay the data, use the following commands for netflow and sflow respectively: + +### Netflow + +``` +nfreplay -H 127.0.0.1 -p 9999 -r nfcapd-ilight-anon-20200114 -v 9 -d 1000 +``` + +### Sflow + +Coming soon. nfreplay will not work with sflow data type. + diff --git a/website/versioned_docs/version-1.2.11/devel/tag.md b/website/versioned_docs/version-1.2.11/devel/tag.md new file mode 100644 index 00000000..040de851 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/devel/tag.md @@ -0,0 +1,46 @@ +--- +id: docker_dev_tag +title: How to Tag a New Release +sidebar_label: Taggin a Release +--- + +To tag a new release, first updated the version number and Changes file, build the rpm, etc. and upgrade on bare-metal hosts using yum. If all works fine, do the following steps to create new Docker images. + +## In Github, Create a Release/Tag + +Be sure to copy info from the Changes file into the Release description. + +Do this first ??? + +## To Build and Push an Importer Image Manually + +Git clone the pipeline project and have the ?? branch checked out. + +``` +$ docker-compose build +$ docker login +$ docker push $image:$tag +``` + +This will build the image and push it to Docker Hub. + +The person doing this has to have a Docker Hub account and belong to the Netsage team (3 users are allowed, for the free level). + +## With Automation + + +## Versioned Docs + +A new set of versioned docs also has to be tagged. See the Docusaurus guide. + +Does this have to happen before Building the image ?? + +## New Version of Nfdump + +If a new version of dfdump has been released that we need, +???? + +## New Version of Logstash + +If a new version of logstash has been released that we want everyone to use, +??? diff --git a/website/versioned_docs/version-1.2.11/pipeline/elastic_search.md b/website/versioned_docs/version-1.2.11/pipeline/elastic_search.md new file mode 100644 index 00000000..c82a8dbd --- /dev/null +++ b/website/versioned_docs/version-1.2.11/pipeline/elastic_search.md @@ -0,0 +1,124 @@ +--- +id: elastic +title: Elasticsearch +sidebar_label: Elasticsearch +--- + +Flow data is ultimately saved to Elasticsearch. Following are the fields that are used/created in Logstash and that you may see returned by an elasticsearch query. + +### Flow fields + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|start |Jun 9, 2020 @ 17:39:53.808 | Start time of the flow (first packet seen)| +|end |Jun 9, 2020 @ 17:39:57.699 |End time of the flow (last packet seen)| +|meta.id |a17c4f0542... |Id of the flow (hash of 5-tuple + Sensor name)| +|es_doc_id |4f46bef884... |Hash of meta.id and start time. May be used as doc id in ES to prevent duplicates, but see Notes elsewhere.| +|meta.flow_type |sflow |'sflow', 'netflow', or 'tstat'| +|meta.protocol |tcp |Protocol used| +|meta.sensor_id | snvl2-pw-sw-1-mgmt-2.cenic.net|Sensor name (set in importer config, may not always be a hostname) | +|meta.sensor_group |CENIC |Sensor group, usually the network | +|meta.sensor_type |Regional Network |Sensor type ('Circuit', 'Regional Network', etc) | +|meta.country_scope |Domestic |'Domestic', 'International', or 'Mixed', depending on countries of src and dst| +|meta.is_network_testing | no | 'yes' if discipline is 'CS.Network Testing and Monitoring' or port is one used for PerfSonar: 5001, 5101, or 5201| + +### Source Fields (Destination Fields similarly with "dst") + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|meta.src_ip |171.64.68.x | deidentified IP address| +|meta.src_port |80 |port used | +|meta.src_asn |32 |Source ASN from the flow header or, in some cases, the ANS of the IP from the MaxMind GeoIP ASN database| +|meta.src_organization |Stanford University | organization that owns the AS from the CAIDA ASN-Organization database +|meta.src_location.lat | 37.423 | latitude of the IP from the MaxMind GeoIP City database| +|meta.src_location.lon |-122.164 | longitude of the IP from the MaxMind GeoIP City database| +|meta.src_country_name |United States | country of the IP from the MaxMind GeoIP City database| +|meta.src_continent |North America | continent of the IP the MaxMind GeoIP City database| +|meta.src_ifindex |166 |the index of the interface the flow came into| + +### Source Science Registry Fields (Destination Fields similarly with "dst") +The [Science Registry](https://scienceregistry.netsage.global/rdb/) stores human-curated information about various "resources". Resources are sources and destinations of flows. + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|meta.scireg.src.discipline |MPS.Physics.High Energy |The science discipline that uses the resource (ie IP). Note that not the src MAY not have the same discipline as the dst. | +|meta.scireg.src.role |Storage |Role that the host plays | +|meta.scireg.src.org_name |Boston University (BU) |The organization the manages and/or uses the resource, as listed in the Science Registry| +|meta.scireg.src.org_abbr |Boston U |A shorter name for the organization. May not be the official abbreviation.| +|meta.scireg.src.resource |BU - ATLAS |Descriptive resource name from SciReg | +|meta.scireg.src.resource_abbr | |Resource abbreviation (if any)| +|meta.scireg.src.project_names |ATLAS |"Projects" that the resource is part of| +|meta.scireg.src.latitude |37.4178 |Resource's latitude, as listed in the Science Registry| +|meta.scireg.src.longitude |-122.178 |Resource's longitude, as listed in the Science Registry| + +### Source "Preferred" Fields (Destination Fields similarly with "dst") + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|meta.src_preferred_org |Stanford University |If the IP was found in the Science Registry, this is the SciReg organization, otherwise it is the CAIDA organization| +|meta.src_preferred_location.lat |37.417800 | Science Registry value if available, otherwise the MaxMind City DB value| +|meta.src_preferred_location.lon |-122.172000i | Science Registry value if available, otherwise the MaxMind City DB value | + +### Value Fields + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|values.num_bits |939, 458, 560 |Sum of the number of bits in the (stitched) flow| +|values.num_packets |77, 824 |Sum of the number of packets in the (stitched) flows| +|values.duration |3.891 |Calculated as end minus start.| +|values.bits_per_second |241, 443, 988 |Calculated as num_bits divided by duration | +|values.packets_per_second |20, 001 |Calculated as num_packets divided by duration| + +### Tstat Value Fields + +|name |example | +|-----------------------|-----------------------| +|values.tcp_cwin_max |1549681 | +|values.tcp_cwin_min |17| +|values.tcp_initial_cwin|313| +|values.tcp_max_seg_size|64313| +|values.tcp_min_seg_size|17| +|values.tcp_mss |8960| +|values.tcp_out_seq_pkts|0| +|values.tcp_pkts_dup |0| +|values.tcp_pkts_fc |0| +|values.tcp_pkts_fs |0| +|values.tcp_pkts_reor |0| +|values.tcp_pkts_rto |0| +|values.tcp_pkts_unfs |0| +|values.tcp_pkts_unk |2| +|values.tcp_pkts_unrto |0| +|values.tcp_rexmit_bytes |1678| +|values.tcp_rexmit_pkts |2| +|values.tcp_rtt_avg |0.044| +|values.tcp_rtt_max |39.527| +|values.tcp_rtt_min |0.001| +|values.tcp_rtt_std |0.276| +|values.tcp_sack_cnt | 1| +|values.tcp_win_max |1549681| +|values.tcp_win_min |17| +|values.tcp_window_scale |13| + +### Developer Fields + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|@pipeline_ver |1.2.11 | Version number of the pipeline used to process this flow | +|@ingest_time |Jun 9, 2020 @ 10:03:20.700 | The time the flow entered the logstash pipeline | +|@timestamp |Jun 9, 2020 @ 18:03:21.703 |The time the flow entered the logstash pipeline for tstat flows, or the time stitching finished and the event exited the aggregation filter for other flows.| +|@exit_time |Jun 9, 2020 @ 18:03:25.369 |The time the flow exited the pipeline | +|@processing_time |688.31 |@exit_time minus @ingest_time. Useful for seeing how long stitching took. | +|stitched_flows |13 |Number of flows that came into logstash that were stitched together to make this final one. 1 if no flows were stitched together. 0 for tstat flows, which are never stitched. | +|tags |maxmind src asn |Various info and error messages| +|trial | 5 |Can be set in 40-aggregation.conf if desired| + +### Elasticsearch Fields + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|_index | om-ns-netsage-2020.06.14 | name of the index ("database table") | +|_type |_doc | set by ES | +|_id |HRkcm3IByJ9fEnbnCpaY | elasticsearch document id. | +|_score |1 |set by ES query | +|@version |1 | set by ES | + diff --git a/website/versioned_docs/version-1.2.11/pipeline/importer.md b/website/versioned_docs/version-1.2.11/pipeline/importer.md new file mode 100644 index 00000000..24b05c4b --- /dev/null +++ b/website/versioned_docs/version-1.2.11/pipeline/importer.md @@ -0,0 +1,14 @@ +--- +id: importer +title: Importer +sidebar_label: Importer +--- +A netsage-netflow-importer script reads any new nfcapd files that have come in after a configurable delay and writes the results to the "netsage_deidentifier_raw" RabbitMQ queue. +All flow data waits in the queue until it is read in and processed by the logstash pipeline. + +To read nfcapd files, the importer uses an nfdump command with the "-a" option to aggregate raw flows within the file by the "5-tuple," i.e., the source and destination IPs, ports, and protocol. The "-L" option is used to throw out any aggregated flows below a threshold number of bytes. This threshold is specified in the importer config file. + +### Configuration +Configuration files for the importer are netsage_netflow_importer.xml and netsage_shared.xml in /etc/grnoc/netsage/deidentfier/. Comments in the files briefly describe the options. See also the Deployment pages in these docs. + +To avoid re-reading nfcapd files, the importer stores the names of files that have already been read in /var/cache/netsage/netflow_importer.cache. diff --git a/website/versioned_docs/version-1.2.11/pipeline/intro.md b/website/versioned_docs/version-1.2.11/pipeline/intro.md new file mode 100644 index 00000000..f4cce287 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/pipeline/intro.md @@ -0,0 +1,37 @@ +--- +id: intro +title: Intro +sidebar_label: Intro +--- +# The NetSage Pipeline + +## Description + +The Netsage Flow Processing Pipeline is composed of several components for processing network flow data, including importing, deidentification, metadata tagging, flow stitching, etc. +There are many ways the components can be combined, configured, and run. These documents will describe the standard "simple" set up and provide information for more complex configurations. + +## Data Collection + +In Netsage, sensor(s) are network devices configured to collect flow data ([tstat](http://tstat.polito.it/), [sflow](https://www.rfc-editor.org/info/rfc3176), or [netflow](https://www.cisco.com/c/en/us/products/collateral/ios-nx-os-software/ios-netflow/prod_white_paper0900aecd80406232.html)) and send it to a "pipeline host" for processing. + +Tstat flow data can be sent directly to the pipeline ingest RabbitMQ queue on the pipeline host using the Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) tool. This can be installed as usual or via Docker. + +Sflow and netflow data from configured routers should be sent to the pipeline host where it is collected and stored into nfcapd files using [nfdump tools](https://github.com/phaag/nfdump). The Netsage project has packaged the nfdump tools into a [Docker container](https://github.com/netsage-project/docker-nfdump-collector) for ease of use. + +## Pipeline Components + +The Netsage Flow Processing Pipeline is made of the following components + + - Importer: Perl scripts on the pipeline host that read nfcapd flow files and send the flow data to a RabbitMQ queue. ([Doc](importer.md), [in github](https://github.com/netsage-project/netsage-pipeline/blob/master/lib/GRNOC/NetSage/Deidentifier/NetflowImporter.pm)) + - [RabbitMQ](https://www.rabbitmq.com/): Used for message passing and queuing of tasks. + - [Logstash](https://www.elastic.co/logstash) pipeline: Performs a variety of operations on the flow data to transform it and add additional information. ([Doc](logstash.md)) + - [Elasticsearch](https://www.elastic.co/what-is/elasticsearch): Used for storing the final flow data. + +## Visualization + +[Grafana](https://grafana.com/oss/grafana/) or [Kibana](https://www.elastic.co/kibana) can be used to visualize the data stored in elasticsearch. Netsage Grafana Dashboards are available [in github](https://github.com/netsage-project/netsage-grafana-configs). + +## Pipeline Installation + +Originally, the pipeline was deployed by installing all of the components individually on one or more servers (the "BareMetal" or "Manual" Install). More recently, we've also added a Docker deployment option. With simple pipelines having just one sflow and/or one netflow sensor (and any number of tstat sensors), the basic "Docker Installation" should suffice. The "Docker Advanced Options" guide will help when there are more sensors and/or other customizations required. + diff --git a/website/versioned_docs/version-1.2.11/pipeline/logstash.md b/website/versioned_docs/version-1.2.11/pipeline/logstash.md new file mode 100644 index 00000000..658b240a --- /dev/null +++ b/website/versioned_docs/version-1.2.11/pipeline/logstash.md @@ -0,0 +1,128 @@ +--- +id: logstash +title: Logstash Pipeline +sidebar_label: Logstash +--- + +The Logstash portion of the Netsage Pipeline reads in flows from a RabbitMQ queue, performs various transformations and adds additional information to them, then sends them to a location specified in the output logstash config, eventually ending up in an Elasticsearch instance. + +Logstash config files invoke various logstash "filters" and actions. These conf files are located in /etc/logstash/conf.d/. See below for a brief description of what each does and check the files for comments. + +Notes: + - All \*.conf files in conf.d/ are executed in alphabetical order, as if they were one huge file. Those ending in .disabled will not be executed (assuming 'path.config: "/etc/logstash/conf.d/*.conf"' in /etc/logstash/pipelines.yml). + - If actions in a particular .conf file are not needed in your particular case, they can be removed or the file disabled, but check carefully for effects on downstream configs. + - MaxMind, CAIDA, and Science Registry database files required by the geoip and aggregate filters are downloaded from scienceregistry.netsage.global via cron jobs weekly or daily. (MaxMind data can change weekly, CAIDA quarterly, Science Registry information randomly.) **NOTE that new versions won't be used in the pipeline until logstash is restarted.** There is a cron file to do this also, though it's not running in Docker deployments. Similarly for other support files, eg, those used in 90-additional-fields.conf. + - Lookup tables for 55-member-orgs.conf that we have compiled are available from sciencregistry.grnoc.iu.edu. See the cron files provided. These will not be updated often, so you may run the cron jobs or not. You will need to provide lists for other networks yourself or ask us. + +## Logstash Sequence + +The main things done in each conf file are as follows. + +### 01-input-rabbit.conf + +Reads flows from a rabbitmq queue. (The ".disabled" extenstion can be removed from other 01-input configs available in conf.d/ to get flows from other sources.) + +### 10-preliminaries.conf + +Drops flows to or from private IP addresses; +converts any timestamps in milliseconds to seconds; +drops events with timestamps more than a year in the past or (10 sec) in the future; +sets duration and rates to 0 if duration is <= 0.002 sec (because tiny durations/few samples lead to inaccurate rates) + +### 15-sensor-specific-changes.conf + +Makes any changes to fields needed for specific sensors. This config currently provides 1) the ability to drop all flows that do not use interfaces (ifindexes) in a specfied list; lists can be sensor-specific, 2) the ability to change the sensor name for flows from a specified sensor which use a certain interface, and 3) the ability to apply a sampling rate correction manually for named sensors. You may edit the file in a bare-metal installation and specify everything explicitly (upgrades will not overwrite this config) or you may use the environment file specified in the systemd unit file. For Docker installations, use the .env file to specifiy the parameters. By default, this config will do nothing since the flags will be set to False. + +### 20-add_id.conf + +Adds a unique id (evenutally called meta.id) which is a hash of the 5-tuple of the flow (src and dst ips and ports, and protocol) plus the sensor name. This id is used for aggregating (stitching) in the next step. + +### 40-aggregation.conf + +Stitches together flows from different nfcapd files into longer flows, matching them up by meta.id and using a specified inactivity_timeout to decide when to start a new flow. + +Notes: + - By default, 5-minute nfcapd files are assumed and the inactivity_timeout is set to 10.5 minutes. If more than 10.5 min have passed between the start of the current flow and the start of the last matching one, do not stitch them together. + - If your nfcapd files are written every 15 minutes, change the inactivity_timeout to at least 16 minutes. + - There is another "timeout" setting which is basically the maximum duration of a stitched flow (default: 24 hr). + - When logstash shuts down, any flows "in the aggregator" will be written out to aggregate_maps_path (default: /tmp/logstash-aggregation-maps). The file is then read back in when logstash is restarted so aggregation can continue. + - Your logstash pipeline can have only 1 worker or aggregation is not going to work! This is set in the logstash config file. + - Tstat flows come in already complete, so no aggregation is done on those flows. + +### 45-geoip-tagging.conf + +Queries the MaxMind GeoLite2-City database by IP to get src and dst Countries, Continents, Latitudes, and Longitudes; +if the destination IP is in the multicast range, sets the destination Organization, Country, and Continent to "Multicast". + +*This product uses GeoLite2 data created by MaxMind, available from [www.maxmind.com](http://www.maxmind.com).* + +### 50-asn.conf + +Normally with sflow and netflow, flows come in with source and destination ASNs. If there is no ASN in the input event; or the input ASN is 0, 4294967295, or 23456, or it is a private ASN, tries to get an ASN by IP from the MaxMind ASN database. +Sets ASN to -1 if it is unavailable for any reason. + +### 53-caida-org.conf + +Uses the current source and destination ASNs to get organization names from the prepared CAIDA ASN-to-Organization lookup file. + +*This product uses a lookup table constructed from the CAIDA AS Organizations Dataset - see [www.caida.org](http://www.caida.org/data/as-organizations).* + +### 55-member-orgs.conf + +Searches any provided lookup tables by IP to obtain member or customer organization names and overwrite the Organization determined previously. +This allows entities which don't own their own ASs to be listed as the src or dst Organization. + +Note: These lookup tables are not stored in github, but an example is provided to show the layout and tables we have can be downloaded via a cron job. + +### 60-scireg-tagging-fakegeoip.conf + +Uses a fake geoip database containing [Science Registry](http://scienceregistry.grnoc.iu.edu) information to tag the flows with source and destination science disciplines and roles, organizations and locations, etc; +removes Registry fields we don't need to save to elasticsearch. + +Notes: + - The [Science Registry](https://scienceregistry.netsage.global/rdb/) stores human-curated information about various "resources". Resources are sources and destinations of flows. + - The Science Registry "fake geoip database" is updated weekly and can be downloaded via wget in a cron job (provided in the installation). + +### 70-deidentify.conf + +Replaces the last octet of IPv4 addresses and the last 4 hextets of IPv6 addresses with x's in order to deidentify them. + +### 80-privatize.org.conf + +Removes information about Australian organizations (or, with modification, any country that has privacy rules that require us not to identify organizations). +If the ASN is one of those listed, completely replaces the IP with x's, sets the location to central Autralia, sets all organizations to "AARNet", removes all Projects. + +### 88-preferred-location-org.conf + +Copies Science Registry organization and location values, if they exist, to the meta.preferred_organization and meta.preferred_location fields. If there are no Science Registry values, the organizations and locations from the CAIDA and MaxMind lookups, respectively, are saved to those fields. + +### 90-additional-fields.conf + +Sets additional quick and easy fields. Supporting mapping or ruby files are used - see support/ and ruby/ in conf.d/. Currently we have (for Netsage's use): + - sensor_group = TACC, AMPATH, etc. (based on matching sensor names to regexes) + - sensor_type = Circuit, Archive, Exchange Point, or Regional Network (based on matching sensor names to regexes) + - country_scope = Domestic, International, or Mixed (based on src and dst countries and possibly continents, where Domestic = US, Puerto Rico, or Guam) + - is_network_testing = yes, no (yes if discipline from the science registry is 'CS.Network Testing and Monitoring' or port = 5001, 5101, or 5201) + - es_doc_id = hash of meta.id and the start time of the flow. If this id is used as the document id in elasticsearch, flows that are mistakenly input more than once will update existing documents rather than be added as duplicates. (NOTE: due to how netflow works, use es_doc_id as the ES document id only for sflow!) + +### 95-cleanup.conf + +Does small misc. tasks at the end like rename, remove, or convert fields + +### 98-post-process.conf + +Adds @exit_time and @processing_time (these are mainly for developers) + +### 99-output-rabbit.conf + +Sends results to a final RabbitMQ queue. (".disabled" can be removed from other output configs to send flows to other places) + +### Final Stage + +In the GlobalNOC-Netsage case, the output filter writes the flows to a network-specific RabbitMQ queue on another host and the last stage is a separate logstash pipeline on a 3rd host. The latter reads flows from the final queue using a rabbitmq input filter and sends it into elasticsearch using an elasticsearch output filter with a mapping template which sets data types for the fields. + +## Field names + +The fields used/created in Logstash (and saved to Elasticsearch) are listed in the [Elasticsearch doc](elastic). + + diff --git a/website/versioned_docs/version-1.2.11/pipeline/nfdump.md b/website/versioned_docs/version-1.2.11/pipeline/nfdump.md new file mode 100644 index 00000000..b9519282 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/pipeline/nfdump.md @@ -0,0 +1,17 @@ +--- +id: nfdump +title: Sflow/Netflow Data Collection +sidebar_label: Sflow/Netflow Data +--- + +Sflow and Netflow export can be configured on appropriate network devices. Netsage uses tools in the Nfdump package to collect and process the resulting flow data. The toolset supports netflow v1, v5/v7, v9, IPFIX and SFLOW, IPv4 as well as IPv6. + +## Netsage Usage + +Nfcapd and/or sfcapd processes (from the nfdump package) are used to collect incoming netflow and/or sflow data and save it to disk in nfcapd files. The files are then read by the [importer](importer), which uses an nfdump command, and sent to RabbitMQ. From there, the [logstash](logstash) pipeline ingests the flows and processes them in exactly the same way as it processes tstat flows. The data is eventually saved in elasticsearch and visualized by [grafana dashboards](https://github.com/netsage-project/netsage-grafana-configs). + +One may also use the nfdump command interactively to view the flows in a nfcapd file in a terminal window. + +## Docker Deployment + +The nfdump/nfcapd/sfcapd processes can be invoked locally or using a Docker container. The Docker deployment of the Pipeline uses an nfdump Docker container. (See the Docker Deployment Guide.) The Docker image definitions can be found [HERE](https://github.com/netsage-project/docker-nfdump-collector) diff --git a/website/versioned_docs/version-1.2.11/pipeline/tstat.md b/website/versioned_docs/version-1.2.11/pipeline/tstat.md new file mode 100644 index 00000000..baab97c5 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/pipeline/tstat.md @@ -0,0 +1,16 @@ +--- +id: tstat +title: Tstat Data Collection +sidebar_label: Tstat Data +--- + +## Netsage GitHub Project + +[Tstat](http://tstat.polito.it/) is a passive sniffer that provides insights into traffic patterns. The Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) project provides client programs to parse the captured data and send it to a rabbitmq host where it can then be processed by the [logstash pipeline](logstash), stored in elasticsearch, and finally displayed in our Grafana [dashboards](https://github.com/netsage-project/netsage-grafana-configs). + +## Docker + +Netsage Docker images exist on Docker Hub for tstat and tstat_transport. This is still in a beta state and is in development. The initial documentation is available [here](https://github.com/netsage-project/tstat-transport/blob/master/docs/docker.md). + + + diff --git a/website/versioned_sidebars/version-1.2.11-sidebars.json b/website/versioned_sidebars/version-1.2.11-sidebars.json new file mode 100644 index 00000000..40a8c9ac --- /dev/null +++ b/website/versioned_sidebars/version-1.2.11-sidebars.json @@ -0,0 +1,89 @@ +{ + "version-1.2.11/Pipeline": [ + { + "collapsed": true, + "type": "category", + "label": "Pipeline", + "items": [ + { + "type": "doc", + "id": "version-1.2.11/pipeline/intro" + }, + { + "type": "doc", + "id": "version-1.2.11/pipeline/tstat" + }, + { + "type": "doc", + "id": "version-1.2.11/pipeline/nfdump" + }, + { + "type": "doc", + "id": "version-1.2.11/pipeline/importer" + }, + { + "type": "doc", + "id": "version-1.2.11/pipeline/logstash" + }, + { + "type": "doc", + "id": "version-1.2.11/pipeline/elastic" + } + ] + }, + { + "collapsed": true, + "type": "category", + "label": "Deployment", + "items": [ + { + "type": "doc", + "id": "version-1.2.11/deploy/choose_install" + }, + { + "type": "doc", + "id": "version-1.2.11/deploy/bare_metal_install" + }, + { + "type": "doc", + "id": "version-1.2.11/deploy/docker_install_simple" + }, + { + "type": "doc", + "id": "version-1.2.11/deploy/docker_install_advanced" + }, + { + "type": "doc", + "id": "version-1.2.11/deploy/docker_upgrade" + }, + { + "type": "doc", + "id": "version-1.2.11/deploy/docker_troubleshoot" + } + ] + }, + { + "collapsed": true, + "type": "category", + "label": "Development", + "items": [ + { + "type": "doc", + "id": "version-1.2.11/devel/dev_dataset" + }, + { + "type": "doc", + "id": "version-1.2.11/devel/docker_dev_guide" + }, + { + "type": "doc", + "id": "version-1.2.11/devel/docusaurus" + }, + { + "type": "doc", + "id": "version-1.2.11/devel/docker_dev_tag" + } + ] + } + ] +} diff --git a/website/versions.json b/website/versions.json index 2303a6b2..5b4b29b1 100644 --- a/website/versions.json +++ b/website/versions.json @@ -1,4 +1,5 @@ [ + "1.2.11", "1.2.10", "1.2.9", "1.2.8", From 2991cc8cb2a5d98b65c12221566b9158529e0a27 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 1 Sep 2021 21:37:55 +0000 Subject: [PATCH 036/126] Updated release doc --- website/docs/devel/tag.md | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/website/docs/devel/tag.md b/website/docs/devel/tag.md index 040de851..79b869ef 100644 --- a/website/docs/devel/tag.md +++ b/website/docs/devel/tag.md @@ -1,43 +1,50 @@ --- id: docker_dev_tag -title: How to Tag a New Release -sidebar_label: Taggin a Release +title: How to Do a New Docker-Pipeline Release +sidebar_label: New Docker Release --- -To tag a new release, first updated the version number and Changes file, build the rpm, etc. and upgrade on bare-metal hosts using yum. If all works fine, do the following steps to create new Docker images. +To make a new release, first update the version number and CHANGES file, build the rpm, etc. and upgrade on bare-metal hosts using yum. If all works fine, do the following steps to create new Docker images with which to upgrade Docker deployments. -## In Github, Create a Release/Tag +## In Github, Create a Release Tag Be sure to copy info from the Changes file into the Release description. -Do this first ??? - ## To Build and Push an Importer Image Manually -Git clone the pipeline project and have the ?? branch checked out. +Install docker-compose if not done already. See the Docker Installation instructions. +Git clone (or git pull) the pipeline project and check out the tag branch, and set the version number in docker-compose.build.yml using the script. Eg, for v1.2.11, ``` -$ docker-compose build -$ docker login -$ docker push $image:$tag +git clone https://github.com/netsage-project/netsage-pipeline.git +cd netsage-pipeline +git checkout v1.2.11 +./scripts/docker_select_version.sh 1.2.11 ``` -This will build the image and push it to Docker Hub. - +This will then build the importer and pipeline_logstash images and push them to Docker Hub: +``` +$ sudo systemctl start docker +$ sudo docker-compose -f dcoker-cmpose.build.yml build +$ sudo docker login + provide your DockerHub login credentials +$ docker push $image:$tag +``` The person doing this has to have a Docker Hub account and belong to the Netsage team (3 users are allowed, for the free level). ## With Automation +??? ## Versioned Docs A new set of versioned docs also has to be tagged. See the Docusaurus guide. -Does this have to happen before Building the image ?? +I don't think this has to happen before Building the image ## New Version of Nfdump -If a new version of dfdump has been released that we need, +If a new version of nfdump has been released that we need, ???? ## New Version of Logstash From ec095ef78f184913d4151e2be6e79ea9ca53b018 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 3 Sep 2021 04:45:45 +0000 Subject: [PATCH 037/126] Bump immer from 8.0.4 to 9.0.6 in /website Bumps [immer](https://github.com/immerjs/immer) from 8.0.4 to 9.0.6. - [Release notes](https://github.com/immerjs/immer/releases) - [Commits](https://github.com/immerjs/immer/compare/v8.0.4...v9.0.6) --- updated-dependencies: - dependency-name: immer dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- website/package.json | 2 +- website/yarn.lock | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/website/package.json b/website/package.json index 6091a5e6..9c0d48ae 100644 --- a/website/package.json +++ b/website/package.json @@ -12,7 +12,7 @@ "@docusaurus/core": "^2.0.0-alpha.72", "@docusaurus/preset-classic": "^2.0.0-alpha.72", "classnames": "^2.2.6", - "immer": "^8.0.1", + "immer": "^9.0.6", "node-fetch": "^2.6.1", "react": "^16.8.4", "react-dom": "^16.8.4", diff --git a/website/yarn.lock b/website/yarn.lock index 7b43286a..78d4b8df 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -5107,10 +5107,10 @@ immer@8.0.1: resolved "https://registry.yarnpkg.com/immer/-/immer-8.0.1.tgz#9c73db683e2b3975c424fb0572af5889877ae656" integrity sha512-aqXhGP7//Gui2+UrEtvxZxSquQVXTpZ7KDxfCcKAF3Vysvw0CViVaW9RZ1j1xlIYqaaaipBoqdqeibkc18PNvA== -immer@^8.0.1: - version "8.0.4" - resolved "https://registry.yarnpkg.com/immer/-/immer-8.0.4.tgz#3a21605a4e2dded852fb2afd208ad50969737b7a" - integrity sha512-jMfL18P+/6P6epANRvRk6q8t+3gGhqsJ9EuJ25AXE+9bNTYtssvzeYbEd0mXRYWCmmXSIbnlpz6vd6iJlmGGGQ== +immer@^9.0.6: + version "9.0.6" + resolved "https://registry.yarnpkg.com/immer/-/immer-9.0.6.tgz#7a96bf2674d06c8143e327cbf73539388ddf1a73" + integrity sha512-G95ivKpy+EvVAnAab4fVa4YGYn24J1SpEktnJX7JJ45Bd7xqME/SCplFzYFmTbrkwZbQ4xJK1xMTUYBkN6pWsQ== import-fresh@^2.0.0: version "2.0.0" From ed33a0b6e31d21b103ede0ca1afbe7f5db13753e Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 9 Sep 2021 16:39:09 +0000 Subject: [PATCH 038/126] Removed pip and pika from the importer Docker file. Unneeded and causing errors for me. --- compose/importer/Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/compose/importer/Dockerfile b/compose/importer/Dockerfile index e2ffe463..7eba027e 100644 --- a/compose/importer/Dockerfile +++ b/compose/importer/Dockerfile @@ -33,9 +33,8 @@ COPY compose/importer/logging.conf /etc/grnoc/netsage/deidentifier/ RUN \ yum -y update && \ yum install -y dnf epel-release && \ - yum install -y python-pip nfdump wget && \ + yum install -y nfdump wget && \ dnf install -y /tmp/*.rpm && \ - pip install --upgrade pip pika && \ yum clean all && \ rm -rf /var/cache/yum From 42c6f8fcbc5b424b7e73b53a387ed77fc27112b1 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 9 Sep 2021 19:56:20 +0000 Subject: [PATCH 039/126] Added documentation --- conf-logstash/95-cleanup.conf | 9 +- website/docs/deploy/docker_install_simple.md | 2 +- website/docs/deploy/docker_troubleshooting.md | 10 +- website/docs/deploy/docker_upgrade.md | 21 ++--- website/docs/devel/tag.md | 94 +++++++++++++++---- 5 files changed, 95 insertions(+), 41 deletions(-) diff --git a/conf-logstash/95-cleanup.conf b/conf-logstash/95-cleanup.conf index e0a35168..1abf9131 100644 --- a/conf-logstash/95-cleanup.conf +++ b/conf-logstash/95-cleanup.conf @@ -4,6 +4,7 @@ filter { # Check or edit the 99-outputs file for any action to be taken based on these tags. if [meta][src_ip] == "0.0.0.x" and [meta][dst_ip] == "0.0.0.x" { mutate { + id => "95-1" add_tag => ["Missing IPs"] add_tag => ["DROP"] } @@ -12,26 +13,26 @@ filter { # rename the 5-tuple+sensor hash to meta.id if [flow_fingerprint] { mutate { - id => "95-1" + id => "95-2" rename => { 'flow_fingerprint' => '[meta][id]' } } } # replace start and end timestamps with date fields date { - id => "95-2" + id => "95-3" match => [ '[start]', 'UNIX' ] target => '[start]' } date { - id => "95-3" + id => "95-4" match => [ '[end]' ,'UNIX' ] target => '[end]' } # remove unneeded fields mutate { - id => "95-4" + id => "95-5" remove_field => "[interval]" remove_field => "[type]" } diff --git a/website/docs/deploy/docker_install_simple.md b/website/docs/deploy/docker_install_simple.md index 1c538ecf..c4216138 100644 --- a/website/docs/deploy/docker_install_simple.md +++ b/website/docs/deploy/docker_install_simple.md @@ -48,7 +48,7 @@ git clone https://github.com/netsage-project/netsage-pipeline.git When the pipeline runs, it uses the logstash conf files that are in the git checkout (in conf-logstash/), as well as a couple other files like docker-compose.yml, so it is important to checkout the correct version. -Move into the netsage-pipeline/ directory (**all git and docker commands must be run from inside this directory!**), then checkout the most recent version of the code. It will say you are in 'detached HEAD' state. +Move into the netsage-pipeline/ directory (**all git and docker commands must be run from inside this directory!**), then checkout the most recent version of the code. It will say you are in 'detached HEAD' state if you don't include -b. ```sh git checkout {tag} ``` diff --git a/website/docs/deploy/docker_troubleshooting.md b/website/docs/deploy/docker_troubleshooting.md index 8b10ed1d..031b572b 100644 --- a/website/docs/deploy/docker_troubleshooting.md +++ b/website/docs/deploy/docker_troubleshooting.md @@ -23,18 +23,16 @@ To see if flows are getting into and being read from the rabbit queue on the pip ### If flow collection stops -*Errors:** -- See if any of the containers has died. `docker ps` +**Errors:** +- See if any of the containers has died using `docker ps` - Check the logs of the various containers to see if anything jumps out as being invalid. Eg, `docker-compose logs logstash`. - If logstash dies with an error about not finding \*.conf files, make sure conf-logstash/ and directories and files within are readable by everyone (and directories are executable by everyone). The data/ directory and subdirectories need to be readable and writable by everyone, as well. **Disk space:** - If the pipeline suddenly fails, check to see if the disk is full. If it is, first try getting rid of old docker images and containers to free up space: `docker image prune -a` and `docker container prune`. -- Also check to see how much space the nfcapd files are consuming. You may need to add more disk space. You could also try deleting nfcapd files after a fewer number of days (see Docker Advanced). +- Also check to see how much space the nfcapd files are consuming. You may need to add more disk space. You could also try automatically deleting nfcapd files after a fewer number of days (see Docker Advanced). **Memory:** - If you are running a lot of data, sometimes docker may need to be allocated more memory. The most -likely culprit is logstash which is only allocated 2GB of RAM by default. - -Please see the Docker Advanced guide. +likely culprit is logstash (java) which is only allocated 2GB of RAM by default. Please see the Docker Advanced guide for how to change. diff --git a/website/docs/deploy/docker_upgrade.md b/website/docs/deploy/docker_upgrade.md index ba9ab726..19994d86 100644 --- a/website/docs/deploy/docker_upgrade.md +++ b/website/docs/deploy/docker_upgrade.md @@ -12,26 +12,22 @@ To upgrade a previous installment of the Dockerized pipeline, perform the follow cd {netsage-pipeline directory} docker-compose down ``` -This will stop all the docker containers, including the importer, logstash, and any collectors. Note that incoming flow data will not be saved during the time the collectors are down. +This will stop and remove all the docker containers, including the importer, logstash, and any collectors. Note that incoming flow data will not be saved during the time the collectors are down. ### Update Source Code -To upgrade to a new release, pull new code from github and ine images from dockerhub. Your customized .env and override files will not be overwritten. +To upgrade to a new release, pull new tags/code from github and docker images from dockerhub. Your customized .env and override files will not be overwritten, nor will data files, cache files, or downloaded support files. -Update the git repo, mainly to be able to see the latest tags. ```sh git reset --hard git pull origin master ``` :::warning -git reset --hard will obliterate any changes you have made to non-override files. If necessary, please make sure you commit and save to a feature branch before continuing. - -Example: -```git commit -a -m "Saving local state"; git checkout -b feature/backup; git checkout master``` +git reset --hard will obliterate any changes you have made to non-override files, eg, logstash conf files. If necessary, please make sure you commit and save to a feature branch before continuing. ::: -Run these three commands to select the new release you want to run. In the first, replace "{tag}" by the version to run (eg, v1.2.10). When asked by the third, select the same version as the tag you checked out. +Run these three commands to select the new release you want to run. In the first, replace "{tag}" by the version to run (eg, v1.2.11). When asked by the third, select the same version as the tag you checked out. ```sh git checkout -b {tag} git pull @@ -42,9 +38,10 @@ Check to be sure docker-compose.yml and docker-compose.override.yml both now hav ### Check/Update Override Files Occasionally, something may change which will necessitate editing your override and/or env file. -- Compare the new `docker-compose.override_example.yml` file to your `docker-compose.override.yml`. Look for`version: "x.x"` at the top. If the version number is different, change it in your docker-compose.override.yml file. (This is the Compose file format version.) +- Compare the new `docker-compose.override_example.yml` file to your `docker-compose.override.yml`. Be sure to check to see if the version of nfdump has changed. Look for lines like `image: netsage/nfdump-collector:`. Make sure the version in your override file matches what is the example file. (You do not need to actually perform any upgrade yourself. This will ensure the correct version is pulled from Docker Hub.) + +- Also, look for`version: "x.x"` at the top. If the version number is different, change it in your docker-compose.override.yml file. (This is the Compose file format version.) -- Check to see if the version of nfdump has changed. Look for lines like `image: netsage/nfdump-collector:`. Make sure the version in your override file matches what is the example file. (You do not need to actually perform any upgrade yourself. This will ensure the correct version is pulled from Docker Hub.) - Also compare your `.env` file with the new `env.example` file to see if any new lines or sections have been added. If there have been any changes relevant to your deployment, eg, new options you want to use, copy the changes into your .env file. @@ -53,7 +50,7 @@ Occasionally, something may change which will necessitate editing your override ### Update Docker Containers -Do not forget this step! Pull new images from Docker Hub. This applies for both development and release versions. +Do not forget this step! Pull new images from Docker Hub. ``` docker-compose pull @@ -69,7 +66,7 @@ This will start all the services/containers listed in the docker-compose.yml and ### Delete old images and containers -To save space, delete any old images and containers that are not being used. +To keep things tidy, delete any old images and containers that are not being used. ``` docker image prune -a diff --git a/website/docs/devel/tag.md b/website/docs/devel/tag.md index 79b869ef..9a123822 100644 --- a/website/docs/devel/tag.md +++ b/website/docs/devel/tag.md @@ -1,53 +1,111 @@ --- id: docker_dev_tag -title: How to Do a New Docker-Pipeline Release -sidebar_label: New Docker Release +title: How to Release a New Version of the Pipeline +sidebar_label: Making Releases --- -To make a new release, first update the version number and CHANGES file, build the rpm, etc. and upgrade on bare-metal hosts using yum. If all works fine, do the following steps to create new Docker images with which to upgrade Docker deployments. +If a new version of nfdump needs to be used, make the new nfdump-collector image(s) first (see below) and update the docker-compose files with the new version number, then make new pipeline_importer and pipeline_logstash images.. + +## Make an RPM Release + +Use standard procedures to create an rpm of the new version of the pipeline. Update the version number and the CHANGES file, build the rpm, repoify, etc., then upgrade grnoc-netsage-deidentifier on bare-metal hosts using yum. If all works well, do the following steps to create new Docker images with which to upgrade Docker deployments. ## In Github, Create a Release Tag -Be sure to copy info from the Changes file into the Release description. +Create a new Tag or Release in Github, eg, v1.2.11. +Be sure to copy info from the CHANGES file into the Release description. -## To Build and Push an Importer Image Manually +## To Build and Push Pipeline_Importer and Pipeline_Logstash Images Manually Install docker-compose if not done already. See the Docker Installation instructions. -Git clone (or git pull) the pipeline project and check out the tag branch, and set the version number in docker-compose.build.yml using the script. Eg, for v1.2.11, +Git clone (or git pull) the pipeline project and check out the tag branch, then set the version number in docker-compose.build.yml using the script. Eg, for v1.2.11, ``` git clone https://github.com/netsage-project/netsage-pipeline.git cd netsage-pipeline -git checkout v1.2.11 +git checkout -b v1.2.11 ./scripts/docker_select_version.sh 1.2.11 ``` -This will then build the importer and pipeline_logstash images and push them to Docker Hub: +Then build the pipeline_importer and pipeline_logstash images and push them to Docker Hub: ``` $ sudo systemctl start docker -$ sudo docker-compose -f dcoker-cmpose.build.yml build +$ sudo docker-compose -f docker-compose.build.yml build $ sudo docker login provide your DockerHub login credentials -$ docker push $image:$tag +$ sudo docker-compose -f docker-compose.build.yml push (will push images mentioned in docker-compose.yml ??) + or $ docker push $image:$tag (will push a specific image version) +$ sudo systemctl stop docker ``` -The person doing this has to have a Docker Hub account and belong to the Netsage team (3 users are allowed, for the free level). +If you run into an error about retrieving a mirrorlist and could not find a valid baseurl for repo, restart docker and try again. +If that doesn't work, try adding this to /etc/hosts: `67.219.148.138 mirrorlist.centos.org`, and/or try `yum install net-tools bridge-utils`, and/or restart network.service. + +The person pushing to Docker Hub must have a Docker Hub account and belong to the Netsage team (3 users are allowed, for the free level). + +It might be a good idea to test the images before pushing them. See "Test Docker Images" below. + ## With Automation ??? -## Versioned Docs +## Test Docker Images + +See the Docker installation instructions... + +In the git checkout of the correct version, make a .env file and a docker-compose.override.yml file in the git checkout. Use samplicate or some other method to have data sent to the dev host. You probably want to send the processed data to a dev Elasticsearch instance. + +Run docker_select_version.sh if you haven't already, then start it up `$ sudo docker-compsoe up -d`! If there are local images, they'll be used, otherwise they'll be pulled from Docker Hub. -A new set of versioned docs also has to be tagged. See the Docusaurus guide. +After about 30 minutes, you should see flows in elasticsearch. -I don't think this has to happen before Building the image +### Versioned Docs + +A new set of versioned docs also has to be tagged once you are done making changes for the latest pipeline version. See the Docusaurus guide. + +### New Nfdump-Collector Images + +If a new version of nfdump has been released that we need, new nfdump-collector images need to be made. + +``` +$ git clone https://github.com/netsage-project/docker-nfdump-collector.git +$ cd docker-nfdump-collector +$ sudo systemctl start docker +``` + +To use squash: create a file at /etc/docker/daemon.json and put into it +``` + "experimental": true + "debug: false" +``` + +To build version $VER, eg, 1.6.23 (both regular and alpine linux versions ?): +``` +$ sudo docker build --build-arg NFDUMP_VERSION=$VER --tag netsage/nfdump-collector:$VER --squash collector +$ sudo docker build --build-arg NFDUMP_VERSION=$VER --tag netsage/nfdump-collector:alpine-$VER -f collector/Dockerfile-alpine --squash . +``` + +To push to Docker Hub and quit docker +``` +$ sudo docker login + provide your DockerHub login credentials +$ sudo docker push netsage/nfdump-collector:$VER +$ sudo systemctl stop docker +``` + +To use the new collector image in the pipeline, change the version number in docker-compose.override_example.yml. For example, to use the alpine-1.6.23 image: +``` +sflow-collector: + image: netsage/nfdump-collector:alpine-1.6.23 +... +netflow-collector: + image: netsage/nfdump-collector:alpine-1.6.23 +``` -## New Version of Nfdump +Remind users to make the same change in their docker-compose.override.yml file when they do the next pipeline upgrade. -If a new version of nfdump has been released that we need, -???? -## New Version of Logstash +### New Version of Logstash If a new version of logstash has been released that we want everyone to use, ??? From a08933f0e556258e4ecee6c973a0f99448711ab6 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 9 Sep 2021 20:17:59 +0000 Subject: [PATCH 040/126] More doc changes --- .../docs/deploy/docker_install_advanced.md | 2 +- website/docs/deploy/docker_troubleshooting.md | 11 ++++------ website/docs/devel/tag.md | 22 ++++++++++--------- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/website/docs/deploy/docker_install_advanced.md b/website/docs/deploy/docker_install_advanced.md index a8cc604c..bc84b812 100644 --- a/website/docs/deploy/docker_install_advanced.md +++ b/website/docs/deploy/docker_install_advanced.md @@ -182,7 +182,7 @@ You'll have to manage the volumes exported and ensure all the paths are updated If cpu or memory seems to be a problem, try increasing the JVM heap size for logstash from 2GB to 3 or 4, no more than 8. -To do this, edit LS_JAVA_OPTS in the .env file. [is this working??] +To do this, edit LS_JAVA_OPTS in the .env file. ```yaml LS_JAVA_OPTS=-Xmx4g -Xms4g ``` diff --git a/website/docs/deploy/docker_troubleshooting.md b/website/docs/deploy/docker_troubleshooting.md index 031b572b..7cfc2690 100644 --- a/website/docs/deploy/docker_troubleshooting.md +++ b/website/docs/deploy/docker_troubleshooting.md @@ -10,16 +10,13 @@ sidebar_label: Troubleshooting **Troubleshooting checklist:** +- Use `docker-compose ps` to be sure the collectors (and other containers) are running. - Make sure you configured your routers to point to the correct address/port where the collector is running.  - Check iptables on your pipeline host to be sure incoming traffic from the routers is allowed. -- Use `docker-compose ps` to be sure the collectors (and other containers) are running. -- Check to see if nfcapd files are being written. There should be a directory for the year, month, day and files should be larger than a few hundred bytes. If the files exist but are too small, the collector is running but there are no incoming flows. "nfdump -r filename" will show the flows in a file. +- Check to see if nfcapd files are being written. There should be a directory for the year, month, and day in netsage-pipeline/data/input_data/netflow/ or sflow/, and files should be larger than a few hundred bytes. If the files exist but are too small, the collector is running but there are no incoming flows. "nfdump -r filename" will show the flows in a file (you may need to install nfdump). - Make sure you created .env and docker-compose.override.yml files and updated the settings accordingly, sensorName especially since that identifies the source of the data. -- Check the logs of the various containers to see if anything jumps out as being invalid.  `docker-compose logs -f $service_label` -- Check the logs to see if logstash is starting successfully. -- If the final rabbit queue is on an external host, check iptables on that host to be sure incoming traffic from your pipeline host is allowed. - -To see if flows are getting into and being read from the rabbit queue on the pipeline host, you can go to `http://localhost:15672` in your favorite web browser. Login as guest with password guest. Look for accumulating messages and/or messages being acknowledged and published. +- Check the logs of the various containers to see if anything jumps out as being invalid.  `docker-compose logs $service`, where $service is logstash, importer, rabbit, etc. +- If the final rabbit queue is on an external host, check the credentials you are using and whether iptables on that host allows incoming traffic from your pipeline host. ### If flow collection stops diff --git a/website/docs/devel/tag.md b/website/docs/devel/tag.md index 9a123822..86a3eb12 100644 --- a/website/docs/devel/tag.md +++ b/website/docs/devel/tag.md @@ -15,11 +15,13 @@ Use standard procedures to create an rpm of the new version of the pipeline. Upd Create a new Tag or Release in Github, eg, v1.2.11. Be sure to copy info from the CHANGES file into the Release description. -## To Build and Push Pipeline_Importer and Pipeline_Logstash Images Manually +## To Build and Push Images Manually + +Below is the procedure to build pipeline_importer and pipeline_logstash images manually. Install docker-compose if not done already. See the Docker Installation instructions. -Git clone (or git pull) the pipeline project and check out the tag branch, then set the version number in docker-compose.build.yml using the script. Eg, for v1.2.11, +Git clone (or git pull) the pipeline project and check out the tag you want to build, then set the version number in docker-compose.build.yml using the script. Eg, for v1.2.11, ``` git clone https://github.com/netsage-project/netsage-pipeline.git cd netsage-pipeline @@ -38,32 +40,32 @@ $ sudo docker-compose -f docker-compose.build.yml push (will push images ment $ sudo systemctl stop docker ``` If you run into an error about retrieving a mirrorlist and could not find a valid baseurl for repo, restart docker and try again. -If that doesn't work, try adding this to /etc/hosts: `67.219.148.138 mirrorlist.centos.org`, and/or try `yum install net-tools bridge-utils`, and/or restart network.service. +If that doesn't work, try adding this to /etc/hosts: `67.219.148.138 mirrorlist.centos.org`, and/or try `yum install net-tools bridge-utils`, and/or restart network.service then docker. The person pushing to Docker Hub must have a Docker Hub account and belong to the Netsage team (3 users are allowed, for the free level). It might be a good idea to test the images before pushing them. See "Test Docker Images" below. -## With Automation +## Building With Automation ??? ## Test Docker Images -See the Docker installation instructions... +See the Docker installation instructions for details... -In the git checkout of the correct version, make a .env file and a docker-compose.override.yml file in the git checkout. Use samplicate or some other method to have data sent to the dev host. You probably want to send the processed data to a dev Elasticsearch instance. +In the git checkout of the correct version, make an .env file and a docker-compose.override.yml file. You probably want to send the processed data to a dev Elasticsearch instance. Use samplicate or some other method to have data sent to the dev host. -Run docker_select_version.sh if you haven't already, then start it up `$ sudo docker-compsoe up -d`! If there are local images, they'll be used, otherwise they'll be pulled from Docker Hub. +Run docker_select_version.sh if you haven't already, then start it up `$ sudo docker-compose up -d`. If there are local images, they'll be used, otherwise they'll be pulled from Docker Hub. After about 30 minutes, you should see flows in elasticsearch. -### Versioned Docs +## Make Versioned Docs -A new set of versioned docs also has to be tagged once you are done making changes for the latest pipeline version. See the Docusaurus guide. +A new set of versioned docs also has to be tagged once you are done making changes for the latest pipeline version. See the **Docusaurus guide**. -### New Nfdump-Collector Images +## TO Make New Nfdump-Collector Images If a new version of nfdump has been released that we need, new nfdump-collector images need to be made. From 98ad5c3a565ba018bb0dcc10e366b647e0f50e5d Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 9 Sep 2021 21:03:21 +0000 Subject: [PATCH 041/126] couple more doc changes --- website/docs/deploy/docker_upgrade.md | 13 +++++++++---- website/docs/devel/tag.md | 2 +- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/website/docs/deploy/docker_upgrade.md b/website/docs/deploy/docker_upgrade.md index 19994d86..d640d12a 100644 --- a/website/docs/deploy/docker_upgrade.md +++ b/website/docs/deploy/docker_upgrade.md @@ -33,9 +33,9 @@ git checkout -b {tag} git pull ./scripts/docker_select_version.sh ``` -Check to be sure docker-compose.yml and docker-compose.override.yml both now have the version number you selected for pipeline_importer and pipeline_logstash. +The docker-compose.yml and docker-compose.override.yml should both now have the version number you selected for pipeline_importer and pipeline_logstash. -### Check/Update Override Files +### Check/Update Customization Files Occasionally, something may change which will necessitate editing your override and/or env file. - Compare the new `docker-compose.override_example.yml` file to your `docker-compose.override.yml`. Be sure to check to see if the version of nfdump has changed. Look for lines like `image: netsage/nfdump-collector:`. Make sure the version in your override file matches what is the example file. (You do not need to actually perform any upgrade yourself. This will ensure the correct version is pulled from Docker Hub.) @@ -43,14 +43,14 @@ Occasionally, something may change which will necessitate editing your override - Also, look for`version: "x.x"` at the top. If the version number is different, change it in your docker-compose.override.yml file. (This is the Compose file format version.) -- Also compare your `.env` file with the new `env.example` file to see if any new lines or sections have been added. If there have been any changes relevant to your deployment, eg, new options you want to use, copy the changes into your .env file. +- Compare your `.env` file with the new `env.example` file to see if any new lines or sections have been added. If there have been any changes relevant to your deployment, eg, new options you want to use, copy the changes into your .env file. - If you used the Docker Advanced guide to make a `netsage_override.xml` file, compare it to `netsage_shared.xml` to see if there are any changes. This is unlikely. ### Update Docker Containers -Do not forget this step! Pull new images from Docker Hub. +This should be done automatically when you start up the conctainers, but you can also pull new images from Docker Hub now. ``` docker-compose pull @@ -73,3 +73,8 @@ docker image prune -a docker container prune ``` +To check which images you have +``` +docker image ls +``` + diff --git a/website/docs/devel/tag.md b/website/docs/devel/tag.md index 86a3eb12..18819a89 100644 --- a/website/docs/devel/tag.md +++ b/website/docs/devel/tag.md @@ -65,7 +65,7 @@ After about 30 minutes, you should see flows in elasticsearch. A new set of versioned docs also has to be tagged once you are done making changes for the latest pipeline version. See the **Docusaurus guide**. -## TO Make New Nfdump-Collector Images +## To Make New Nfdump-Collector Images If a new version of nfdump has been released that we need, new nfdump-collector images need to be made. From 9590297bb90b87bf189b4256255ea67ebbd2c162 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 9 Sep 2021 21:14:45 +0000 Subject: [PATCH 042/126] removing 1.2.11 versioned docs --- .../version-1.2.11/components/docker_env.md | 40 --- .../components/docker_first_steps.md | 26 -- .../components/docker_pipeline.md | 31 -- .../deploy/bare_metal_install.md | 299 ------------------ .../version-1.2.11/deploy/choosing.md | 25 -- .../deploy/docker_install_advanced.md | 216 ------------- .../deploy/docker_install_simple.md | 121 ------- .../deploy/docker_troubleshooting.md | 40 --- .../version-1.2.11/deploy/docker_upgrade.md | 78 ----- .../version-1.2.11/devel/docker.md | 83 ----- .../devel/documentation_guide.md | 143 --------- .../version-1.2.11/devel/pipeline_dataset.md | 34 -- .../version-1.2.11/devel/tag.md | 46 --- .../version-1.2.11/pipeline/elastic_search.md | 124 -------- .../version-1.2.11/pipeline/importer.md | 14 - .../version-1.2.11/pipeline/intro.md | 37 --- .../version-1.2.11/pipeline/logstash.md | 128 -------- .../version-1.2.11/pipeline/nfdump.md | 17 - .../version-1.2.11/pipeline/tstat.md | 16 - .../version-1.2.11-sidebars.json | 89 ------ website/versions.json | 1 - website/yarn.lock | 6 +- 22 files changed, 3 insertions(+), 1611 deletions(-) delete mode 100644 website/versioned_docs/version-1.2.11/components/docker_env.md delete mode 100644 website/versioned_docs/version-1.2.11/components/docker_first_steps.md delete mode 100644 website/versioned_docs/version-1.2.11/components/docker_pipeline.md delete mode 100644 website/versioned_docs/version-1.2.11/deploy/bare_metal_install.md delete mode 100644 website/versioned_docs/version-1.2.11/deploy/choosing.md delete mode 100644 website/versioned_docs/version-1.2.11/deploy/docker_install_advanced.md delete mode 100644 website/versioned_docs/version-1.2.11/deploy/docker_install_simple.md delete mode 100644 website/versioned_docs/version-1.2.11/deploy/docker_troubleshooting.md delete mode 100644 website/versioned_docs/version-1.2.11/deploy/docker_upgrade.md delete mode 100644 website/versioned_docs/version-1.2.11/devel/docker.md delete mode 100644 website/versioned_docs/version-1.2.11/devel/documentation_guide.md delete mode 100644 website/versioned_docs/version-1.2.11/devel/pipeline_dataset.md delete mode 100644 website/versioned_docs/version-1.2.11/devel/tag.md delete mode 100644 website/versioned_docs/version-1.2.11/pipeline/elastic_search.md delete mode 100644 website/versioned_docs/version-1.2.11/pipeline/importer.md delete mode 100644 website/versioned_docs/version-1.2.11/pipeline/intro.md delete mode 100644 website/versioned_docs/version-1.2.11/pipeline/logstash.md delete mode 100644 website/versioned_docs/version-1.2.11/pipeline/nfdump.md delete mode 100644 website/versioned_docs/version-1.2.11/pipeline/tstat.md delete mode 100644 website/versioned_sidebars/version-1.2.11-sidebars.json diff --git a/website/versioned_docs/version-1.2.11/components/docker_env.md b/website/versioned_docs/version-1.2.11/components/docker_env.md deleted file mode 100644 index 0bfe77ac..00000000 --- a/website/versioned_docs/version-1.2.11/components/docker_env.md +++ /dev/null @@ -1,40 +0,0 @@ -Next, copy `env.example` to `.env` -```sh -cp env.example .env -``` - -then edit the .env file to set the sensor names to unique identifiers (with spaces or not, no quotes) -```sh -# Importer settings -sflowSensorName=My sflow sensor name -netflowSensorName=My netflow sensor name -``` - - - If you have only one collector, remove or comment out the line for the one you are not using. - - If you have more than one of the same type of collector, see the "Docker Advanced" documentation. - -:::note -These names uniquely identify the source of the data and will be shown in the Grafana dashboards. In elasticsearch, they are saved in the `meta.sensor_id` field. Choose names that are meaningful and unique. -For example, your sensor names might be "MyNet New York Sflow" and "MyNet Boston Netflow" or "MyNet New York - London" and "MyNet New York - Paris". Whatever makes sense in your situation. -::: - -You will also want to edit the **Logstash output rabbit queue** section. This section defines where the final data will land after going through the pipeline. By default, it will be written to a rabbitmq queue on `rabbit`, ie, the local rabbitMQ server running in the docker container. Enter a hostname to send to a remote rabbitMQ server (also the correct username, password, and queue key/name). - -```sh -rabbitmq_output_host=rabbit@mynet.edu -rabbitmq_output_username=guest -rabbitmq_output_pw=guest -rabbitmq_output_key=netsage_archive_input -``` -:::note -To send processed flow data to GlobalNOC at Indiana University, you will need to obtain settings for this section from your contact. A new queue may need to be set up at IU, as well as allowing traffic from your pipeline host. (At IU, data from the this final rabbit queue will be moved into an Elasticsearch instance for storage and viewing.) -::: - -The following options are described in the Docker Advanced section: - -**To drop all flows except those using the specfied interfaces**: Use if only some flows from a router are of interest and those can be identified by interface. - -**To change the sensor name for flows using a certain interface**: Use if you want to break out some flows coming into a port and give them a different sensor name. - -**To "manually" correct flow sizes and rates for sampling for specified sensors**: Use if sampling corrections are not being done automatically. Normally you do not need to use this, but check flows to be sure results are reasonable. - diff --git a/website/versioned_docs/version-1.2.11/components/docker_first_steps.md b/website/versioned_docs/version-1.2.11/components/docker_first_steps.md deleted file mode 100644 index 9a75fb05..00000000 --- a/website/versioned_docs/version-1.2.11/components/docker_first_steps.md +++ /dev/null @@ -1,26 +0,0 @@ -#### saving this for now in case I need to put it back ####### - -Then checkout the latest version of the code. If you are a developer you'll want the latest version from master, otherwise please use make sure -you've checked out the latest tagged version. - -For example, -```sh -## Normal Deployment, eg, checkout version 1.2.8 -$ git fetch -$ git checkout v1.2.8 -b v1.2.8 - -## Developers -$ git fetch -$ git reset --hard origin/master -``` - -:::warning -git reset --hard will obliterate any changes. On initial installation, you should not have any, but if you do wish to save any state, please make sure you commit and backup to a feature branch before continuing - -Example: -```git commit -a -m "Saving local state"; git checkout -b feature/backup; git checkout master``` -::: - - -All instructions that follow assume these first steps were performed succesfully. If not, you'll likely run into errors down the line if the code doesn't match up with the instructions provided. - diff --git a/website/versioned_docs/version-1.2.11/components/docker_pipeline.md b/website/versioned_docs/version-1.2.11/components/docker_pipeline.md deleted file mode 100644 index a0709f08..00000000 --- a/website/versioned_docs/version-1.2.11/components/docker_pipeline.md +++ /dev/null @@ -1,31 +0,0 @@ -Start up the pipeline (all containers) using: - -```sh -# docker-compose up -d -``` - -This will also restart any containers/processes that have died. "-d" runs containers in the background. - -You can see the status of the containers and whether any have died (exited) using -```sh -# docker-compose ps -``` - -To check the logs for each of the containers, run - -```sh -# docker-compose logs -# docker-compose logs logstash -# docker-compose logs importer -etc. -``` - -Add `-f` or, e.g., `-f logstash` to see new log messages as they arrive. `--timestamps`, `--tail`, and `--since` are also useful -- look up details in Docker documentation. - -To shut down the pipeline (all containers) use - -```sh -# docker-compose down -``` - -Run all commands from the netsage-pipeline/ directory. diff --git a/website/versioned_docs/version-1.2.11/deploy/bare_metal_install.md b/website/versioned_docs/version-1.2.11/deploy/bare_metal_install.md deleted file mode 100644 index c0c21510..00000000 --- a/website/versioned_docs/version-1.2.11/deploy/bare_metal_install.md +++ /dev/null @@ -1,299 +0,0 @@ ---- -id: bare_metal_install -title: Manual Installation Guide -sidebar_label: Manual Installation ---- - -This document covers installing the NetSage Flow Processing Pipeline manually on a new machine (without using Docker). Steps should be followed below in order unless you know for sure what you are doing. This document assumes a RedHat Linux environment or one of its derivatives. - -## Data sources - -The Processing pipeline needs data to ingest in order to do anything. There are two types of data that can be consumed. - -1. sflow or netflow -2. tstat - -At least one of these must be set up on a sensor to provide the incoming flow data. - -Sflow and netflow data should be sent to ports on the pipeline host where nfcapd and/or sfcapd are ready to receive it. - -Tstat data should be sent directly to the logstash input RabbitMQ queue (the same one that the Importer writes to, if it is used). From there, the data will be processed the same as sflow/netflow data. - -## Installing the Prerequisites - -### Installing nfdump - -The nfdump package provides nfcapd and sfcapd processes which recieve flow data and write nfcapd files. -The Importer also uses nfdump. If you are only collecting tstat data, you do not need nfdump. - - -Nfdump is _not_ listed as a dependency of the Pipeline RPM package, as in a lot cases people are running special builds of nfdump -- but make sure you install it before you try running the Netflow Importer. If in doubt, `yum install nfdump` should work. -Flow data exported by some routers require a newer version of nfdump than the one in the CentOS repos; in these cases, it may be necessary to manually compile and install the lastest nfdump. - -:::note -It is recommended to check the version of nfdump used in the Docker installation and use the same or newer in order to be sure that any fixes for impactful issues are included. -::: - - -If desired, you can also install nfsen, which has a UI for viewing flow data and can manage starting and stopping all the nfcapd/sfcapd processes for you.The nfsen.conf file has a section in which to configure all the sources. - -### Installing RabbitMQ - -The pipeline requires a RabbitMQ server. Typically, this runs on the same server as the pipeline itself, but if need be, you can separate them (for this reason, the Rabbit server is not automatically installed with the pipeline package). - -```sh -[root@host ~]# yum install rabbitmq-server - -``` - -Typically, the default configuration will work. Perform any desired Rabbit configuration, then, start RabbitMQ: - -```sh -[root@host ~]# /sbin/service rabbitmq-server start - or # systemctl start rabbitmq-server.service -``` - -### Installing Logstash - -See the logstash documentation. We are currently using Version 7.10. - -### Installing the EPEL repo - -Some of our dependencies come from the EPEL repo. To install this: - -``` -[root@host ~]# yum install epel-release -``` - -### Installing the GlobalNOC Open Source repo - -The Pipeline package (and its dependencies that are not in EPEL) are in the GlobalNOC Open Source Repo. - -For Red Hat/CentOS 6, create `/etc/yum.repos.d/grnoc6.repo` with the following content. - -``` -[grnoc6] -name=GlobalNOC Public el6 Packages - $basearch -baseurl=https://repo-public.grnoc.iu.edu/repo/6/$basearch -enabled=1 -gpgcheck=1 -gpgkey=https://repo-public.grnoc.iu.edu/repo/RPM-GPG-KEY-GRNOC6 -``` - -For Red Hat/CentOS 7, create `/etc/yum.repos.d/grnoc7.repo` with the following content. - -``` -[grnoc7] -name=GlobalNOC Public el7 Packages - $basearch -baseurl=https://repo-public.grnoc.iu.edu/repo/7/$basearch -enabled=1 -gpgcheck=1 -gpgkey=https://repo-public.grnoc.iu.edu/repo/RPM-GPG-KEY-GRNOC7 -``` - -The first time you install packages from the repo, you will have to accept the GlobalNOC repo key. - -## Installing the Pipeline (Importer and Logstash configs) - -Install it like this: - -``` -[root@host ~]# yum install grnoc-netsage-deidentifier -``` - -Pipeline components: - -1. Flow Filter - GlobalNOC uses this for Cenic data to filter out some flows. Not needed otherwise. -2. Netsage Netflow Importer - required to read nfcapd files from sflow and netflow importers. (If using tstat flow sensors only, this is not needed.) -3. Logstash - be sure the number of logstash pipeline workers in /etc/logstash/logstash.yml is set to 1 or flow stitching/aggregation will not work right! -4. Logstash configs - these are executed in alphabetical order. See the Logstash doc. At a minimum, the input, output, and aggregation configs have parameters that you will need to update or confirm. - -Nothing will automatically start after installation as we need to move on to configuration. - -## Importer Configuration - -Configuration files of interest are - - /etc/grnoc/netsage/deidentifier/netsage_shared.xml - Shared config file allowing configuration of collections, and Rabbit connection information - - /etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml - other settings - - /etc/grnoc/netsage/deidentifier/logging.conf - logging config - - /etc/grnoc/netsage/deidentifier/logging-debug.conf - logging config with debug enabled - -### Setting up the shared config file - -`/etc/grnoc/netsage/deidentifier/netsage_shared.xml` - -There used to be many perl-based pipeline components and daemons. At this point, only the importer is left, the rest having been replaced by logstash. The shared config file, which was formerly used by all the perl components, is read before reading the individual importer config file. - -The most important part of the shared configuration file is the definition of collections. Each sflow or netflow sensor will have its own collection stanza. Here is one such stanza, a netflow example. Instance and router-address can be left commented out. - -``` - - - /path/to/netflow-files/ - - - Netflow Sensor 1 - - - sflow - - - - - - - - -``` - -Having multiple collections in one importer can sometimes cause issues for aggregation, as looping through the collections one at a time adds to the time between the flows, affecting timeouts. You can also set up multiple Importers with differently named shared and importer config files and separate init.d files. - -There is also RabbitMQ connection information in the shared config, though queue names are set in the Importer config. (The Importer does not read from a rabbit queue, but other old components did, so both input and output are set.) - -Ideally, flows should be deidentified before they leave the host on which the data is stored. If flows that have not be deidentified need to be pushed to another node for some reason, the Rabbit connection must be encrypted with SSL. - -If you're running a default RabbitMQ config, which is open only to 'localhost' as guest/guest, you won't need to change anything here. - -``` - - - 127.0.0.1 - 5672 - guest - guest - 0 - 100 - / - 1 - - - - 127.0.0.1 - 5672 - guest - guest - 0 - 100 - / - 1 - -``` - -### Setting up the Importer config file - -`/etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml` - -This file has a few more setting specific to the Importer component which you may like to adjust. - - - Rabbit_output has the name of the output queue. This should be the same as that of the logstash input queue. - - (The Importer does not actually use an input rabbit queue, so we add a "fake" one here.) - - Min-bytes is a threshold applied to flows aggregated within one nfcapd file. Flows smaller than this will be discarded. - - Min-file-age is used to be sure files are complete before being read. - - Cull-enable and cull-ttl can be used to have nfcapd files older than some number of days automatically deleted. - - Pid-file is where the pid file should be written. Be sure this matches what is used in the init.d file. - - Keep num-processes set to 1. - -```xml - - - - - - netsage_deidentifier_netflow_fake - 2 - - - - 3 - netsage_deidentifier_raw - - - - - 100 - - - 1 - - - - - - /var/cache/netsage/netflow_importer.cache - - - - 100000000 - - - 10m - - - - - - - - - - - - - /var/run/netsage-netflow-importer-daemon.pid - - - -``` - -## Logstash Setup Notes - -Standard logstash filter config files are provided with this package. Most should be used as-is, but the input and output configs may be modified for your use. - -The aggregation filter also has settings that may be changed as well - check the two timeouts and the aggregation maps path. - -When upgrading, these logstash configs will not be overwritten. Be sure any changes get copied into the production configs. - -FOR FLOW STITCHING/AGGREGATION - IMPORTANT! -Flow stitching (ie, aggregation) will NOT work properly with more than ONE logstash pipeline worker! -Be sure to set "pipeline.workers: 1" in /etc/logstash/logstash.yml and/or /etc/logstash/pipelines.yml. When running logstash on the command line, use "-w 1". - -## Start Logstash - -```sh -[root@host ~]# /sbin/service logstash start - or # systemctl start logstash.service -``` -It will take couple minutes to start. Log files are normally /var/log/messages and /var/log/logstash/logstash-plain.log. - -When logstash is stopped, any flows currently "in the aggregator" will be written out to /tmp/logstash-aggregation-maps (or the path/file set in 40-aggregation.conf). These will be read in and deleted when logstash is started again. - -## Start the Importer - -Typically, the daemons are started and stopped via init script (CentOS 6) or systemd (CentOS 7). They can also be run manually. The daemons all support these flags: - -`--config [file]` - specify which config file to read - -`--sharedconfig [file]` - specify which shared config file to read - -`--logging [file]` - the logging config - -`--nofork` - run in foreground (do not daemonize) - -```sh -[root@host ~]# /sbin/service netsage-netflow-importer start - or # systemctl start netsage-netflow-importer.service -``` -The Importer will create a deamon process and a worker process. When stopping the service, the worker process might take a few minutes to quit. If it does not quit, kill it by hand. - - -## Cron jobs - -Sample cron files are provided. Please review and uncomment their contents. These periodically download MaxMind, CAIDA, and Science Registry files, and also restart logstash. Logstash needs to be restarted in order for any updated files to be read in. - - - diff --git a/website/versioned_docs/version-1.2.11/deploy/choosing.md b/website/versioned_docs/version-1.2.11/deploy/choosing.md deleted file mode 100644 index 43ae4429..00000000 --- a/website/versioned_docs/version-1.2.11/deploy/choosing.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -id: choose_install -title: Choosing an Installation Procedure -sidebar_label: Choose Install ---- - -## Manual or BareMetal Installation - -The Manual (baremetal) Installation Guide will walk you through installing the pipeline using your own server infrastructure and requires you to maintain all the components involved. - -It will likely be a bit better when it comes to performance, and have greater flexibility, but there is also more complexity involved in configuring and setting up. - -If you are the ultimate consumer of the data then setting up a baremetal version might be worth doing. Or at least the final rabbitMQ that will be holding the data since it'll like need to handle a large dataset. - -## Dockerized Version - -The Docker version makes it trivial to bring up the pipeline for both a developer and consumer. The work is mostly already done for you. It should be a simple matter of configuring a few env settings and everything should 'just' work. - -If you are simply using the pipeline to deliver the anonymized network stats for someone else's consumption, then using the docker pipeline would be preferred. - -## Choose your adventure - -- [Manual/Server Installation](bare_metal_install) -- [Simple Docker](docker_install_simple.md) - 1 netflow sensor and/or 1 sflow sensor -- [Advanced Docker](docker_install_advanced.md) - options that allow for more complex configurations diff --git a/website/versioned_docs/version-1.2.11/deploy/docker_install_advanced.md b/website/versioned_docs/version-1.2.11/deploy/docker_install_advanced.md deleted file mode 100644 index a8cc604c..00000000 --- a/website/versioned_docs/version-1.2.11/deploy/docker_install_advanced.md +++ /dev/null @@ -1,216 +0,0 @@ ---- -id: docker_install_advanced -title: Docker Advanced Options Guide -sidebar_label: Docker Advanced Options ---- - -If the basic Docker Installation does not meet your needs, the following customizations will allow for more complex situations. Find the section(s) which apply to you. - -*Please first read the Docker Installation guide in detail. This guide will build on top of that.* - - -## To Add an Additional Sflow or Netflow Collector - -If you have more than 1 sflow and/or 1 netflow sensor, you will need to create more collectors and modify the importer config file. The following instructions describe the steps needed to add one additional sensor. - -Any number of sensors can be accomodated, although if there are more than a few being processed by the same Importer, you may run into issues where long-lasting flows from sensosr A time out in the aggregation step while waiting for flows from sensors B to D to be processed. (Another option might be be to run more than one Docker deployment.) - - -#### a. Edit docker-compose.override.yml - -The pattern to add a flow collector is always the same. To add an sflow collector called example-collector, edit the docker-compose.override.yml file and add something like - -```yaml - example-collector: - image: netsage/nfdump-collector:alpine-1.6.23 - restart: always - command: sfcapd -T all -l /data -S 1 -w -z -p 9997 - volumes: - - ./data/input_data/example:/data - ports: - - "9997:9997/udp" -``` - -- collector name: should be updated to something that has some meaning, in our example "example-collector". -- image: copy from the default collector sections already in the file. -- command: choose between "sfcapd" for sflow and "nfcapd" for netflow, and at the end of the command, specify the port to watch for incoming flow data. -- volumes: specify where to write the nfcapd files. Make sure the path is unique and in ./data/. In this case, we're writing to ./data/input_data/example. Change "example" to something meaningful. -- ports: make sure the port here matches the port you've set in the command. Naturally all ports have to be unique for this host and the router should be configured to export data to the same port. (?? If the port on your docker container is different than the port on your host/local machine, use container_port:host_port.) - -Make sure the indentation is right or you'll get an error about yaml parsing. - -You will also need to uncomment these lines: - -```yaml - volumes: - - ./userConfig/netsage_override.xml:/etc/grnoc/netsage/deidentifier/netsage_shared.xml -``` - - -#### b. Edit netsage_override.xml - -To make the Pipeline Importer aware of the new data to process, you will need to create a custom Importer configuration: netsage_override.xml. This will replace the usual config file netsage_shared.xml. - -```sh -cp compose/importer/netsage_shared.xml userConfig/netsage_override.xml -``` - -Edit netsage_override.xml and add a new "collection" section for the new sensor as in the following example. The flow-path should match the path set above in docker-compose.override.yml. $exampleSensorName is a new "variable"; don't replace it here, it will be replaced with a value that you set in the .env file. For the flow-type, enter "sflow" or "netflow" as appropriate. (Enter "netflow" if you're running IPFIX.) - -```xml - - /data/input_data/example/ - $exampleSensorName - sflow - -``` - -#### c. Edit environment file - -Then, in the .env file, add a line that sets a value for the "variable" you referenced above, $exampleSensorName. The value is the name of the sensor which will be saved to elasticsearch and which appears in Netsage Dashboards. Set it to something meaningful and unique. E.g., - -```ini -exampleSensorName=MyNet Los Angeles sFlow -``` - - -#### d. Running the new collector - -After doing the setup above and selecting the docker version to run, you can start the new collector by running the following line, using the collector name (or by running `docker-compose up -d` to start up all containers): - -```sh -docker-compose up -d example-collector -``` - -## To Keep Only Flows From Certain Interfaces -If your sensors are exporting all flows, but only those using a particular interface are relevant, use this option in the .env file. The collectors and importer will save/read all incoming flows, but the logstash pipeline will drop those that do not have src_ifindex OR dst_inindex equal to one of those listed. - -In the .env file, uncomment lines in the appropriate section and enter the information required. Be sure `ifindex_filter_flag=True` with "True" capitalized as shown, any sensor names are spelled exactly right, and list all the ifindex values of flows that should be kept and processed. Some examples (use just one!): - -```sh -ifindex_filter_keep=123 -ifindex_filter_keep=123,456 -ifindex_filter_keep=Sensor 1: 789 -ifindex_filter_keep=123; Sensor 1: 789; Sensor 2: 800, 900 -``` - -In the first case, all flows that have src_ifindex = 123 or dst_ifindex = 123 will be kept, regardless of sensor name. (Note that this may be a problem if you have more than 1 sensor with the same ifindex values!) -In the 2nd case, if src or dst ifindex is 123 or 456, the flow will be processed. -In the 3rd case, only flows from Sensor 1 will be filtered, with flows using ifindex 789 kept. -In the last example, any flow with ifindex 123 will be kept. Sensor 1 flows with ifindex 789 (or 123) will be kept, and those from Sensor 2 having ifindex 800 or 900 (or 123) will be kept. - -Spaces don't matter except within the sensor names. Punctuation is required as shown. - - -## To Change a Sensor Name Depending on the Interface Used -In some cases, users want to keep all flows from a certain sensor but differentiate between those that enter or exit through specific sensor interfaces. This can be done by using this option in the .env file. - -In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all 4 fields are set properly! For example, - -```sh -ifindex_sensor_rename_flag=True -ifindex_sensor_rename_old_name=IU Sflow -ifindex_sensor_rename_new_name=IU Bloomington Sflow -ifindex_sensor_rename_ifindex=10032 -``` - -In this case, any flows from the "IU Sflow" sensor that use interface 10032 (src_ifindex = 10032 OR dst_ifindex = 10032) will have the sensor name changed from "IU Sflow" to "IU Bloomington Sflow". Currently, only one such rename can be configured in Docker and only 1 ifindex is allowed. - -:::note -Please notify the devs at IU in advance, if you need to modify a sensor name, because the regexes used for determining sensor_group and sensor_type may have to be updated. -::: - -## To Do Sampling Rate Corrections in Logstash -When flow sampling is done, corrections have to be applied. For example, if you are sampling 1 out of 100 flows, for each flow measured, it is assumed that in reality there would be 100 flows of that size with that src and dst, so the number of bits (and the number of packets, bits/s and packets/s) is multiplied by 100. Usually the collector (nfcapd or sfcapd process) gets the sampling rate from the incoming data and applies the correction, but in some cases, the sensor may not send the sampling rate, or there may be a complex set-up that requires a manual correction. With netflow, a manual correction can be applied using the '-s' option in the nfsen config, if nfsen is being used, or the nfcapd command, but this is not convenient when using Docker. For sflow, there is no such option. In either case, the correction can be made in logstash as follows. - -In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all 3 fields are set properly! The same correction can be applied to multiple sensors by using a comma-separed list. The same correction applies to all listed sensors. For example, - -```sh -sampling_correction_flag=True -sampling_correction_sensors=IU Bloomington Sflow, IU Sflow -sampling_correction_factor=512 -``` - -## To Change How Long Nfcapd Files Are Kept -The importer will automatically delete older nfcapd files for you, so that your disk doesn't fill up. By default, 3 days worth of files will be kept. This can be adjusted by making a netsage_override.xml file: - -```sh -cp compose/importer/netsage_shared.xml userConfig/netsage_override.xml -``` - -At the bottom of the file, edit this section to set the number of days worth of files to keep. Set cull-enable to 0 for no culling. Eg, to save 1 days worth of data: -````xml - - 1 - 1 - -```` - -You will also need to uncomment these lines in docker-compose.override.yml: - -```yaml - volumes: - - ./userConfig/netsage_override.xml:/etc/grnoc/netsage/deidentifier/netsage_shared.xml -``` - - -## To Save Flow Data to a Different Location - -By default, data is saved to subdirectories in the ./data/ directory (ie, the data/ directory in the git checkout). If you would like to use a different location, there are two options. - -1. The best solution is to create a symlink between ./data/ and the preferred location, or, for an NFS volume, export it as ${PROJECT_DIR}/data. - -During installation, delete the data/ directory (it should only contain .placeholder), then create your symlink. Eg, to use /var/netsage/ instead of data/, -```sh -cd {netsage-pipeline dir} -mkdir /var/netsage -rm data/.placeholder -rmdir data -ln -s /var/netsage {netsage-pipeline dir}/data -``` -(Check the permissions of the directory.) - -2. Alternatively, update volumes in docker-compose.yml and docker-compose.override.yml Eg, to save nfcapd files to subdirs in /mydir, set the collector volumes to `- /mydir/input_data/netflow:/data` (similarly for sflow) and set the importer and logstash volumes to `- /mydir:/data`. - -:::warning -If you choose to update the docker-compose file, keep in mind that those changes will cause a merge conflict or be wiped out on upgrade. -You'll have to manage the volumes exported and ensure all the paths are updated correctly for the next release manually. -::: - -## To Customize Java Settings / Increase Memory Available for Lostash - - -If cpu or memory seems to be a problem, try increasing the JVM heap size for logstash from 2GB to 3 or 4, no more than 8. - -To do this, edit LS_JAVA_OPTS in the .env file. [is this working??] -```yaml -LS_JAVA_OPTS=-Xmx4g -Xms4g -``` - -Here are some tips for adjusting the JVM heap size (https://www.elastic.co/guide/en/logstash/current/jvm-settings.html): - -- Set the minimum (Xms) and maximum (Xmx) heap allocation size to the same value to prevent the heap from resizing at runtime, which is a very costly process. -- CPU utilization can increase unnecessarily if the heap size is too low, resulting in the JVM constantly garbage collecting. You can check for this issue by doubling the heap size to see if performance improves. -- Do not increase the heap size past the amount of physical memory. Some memory must be left to run the OS and other processes. As a general guideline for most installations, don’t exceed 50-75% of physical memory. The more memory you have, the higher percentage you can use. - -To modify other logstash settings, rename the provided example file for JVM Options and tweak the settings as desired: - -```sh -cp userConfig/jvm.options_example userConfig/jvm.options -``` - -Also update the docker-compose.override.xml file to uncomment lines in the logstash section. It should look something like this: - -```yaml -logstash: - image: netsage/pipeline_logstash:latest - volumes: - - ./userConfig/jvm.options:/usr/share/logstash/config/jvm.options -``` - -## To Bring up Kibana and Elasticsearch Containers - -The file docker-compose.develop.yaml can be used in conjunction with docker-compose.yaml to bring up the optional Kibana and Elastic Search components. - -This isn't a production pattern but the tools can be useful at times. Please refer to the [Docker Dev Guide](../devel/docker_dev_guide#optional-elasticsearch-and-kibana) - diff --git a/website/versioned_docs/version-1.2.11/deploy/docker_install_simple.md b/website/versioned_docs/version-1.2.11/deploy/docker_install_simple.md deleted file mode 100644 index 1c538ecf..00000000 --- a/website/versioned_docs/version-1.2.11/deploy/docker_install_simple.md +++ /dev/null @@ -1,121 +0,0 @@ ---- -id: docker_install_simple -title: Docker Installation Guide -sidebar_label: Docker Installation ---- -In this deployment guide, you will learn how to deploy a basic Netsage setup that includes one sflow and/or one netflow collector. If you have more than one collector of either type, or other special situations, see the Docker Advanced guide. - -The Docker containers included in the installation are - - rabbit (the local RabbitMQ server) - - sflow-collector (receives sflow data and writes nfcapd files) - - netflow-collector (receives netflow data and writes nfcapd files) - - importer (reads nfcapd files and puts flows into a local rabbit queue) - - logstash (logstash pipeline that processes flows and sends them to their final destination, by default a local rabbit queue) - - ofelia (cron-like downloading of files used by the logstash pipeline) - -The code and configs for the importer and logstash pipeline can be viewed in the netsage-project/netsage-pipeline github repo. See netsage-project/docker-nfdump-collector for code related to the collectors. - - -### 1. Set up Data Sources -The data processing pipeline needs data to ingest in order to do anything, of course. There are three types of data that can be consumed. - - - sflow - - netflow - - tstat - -At least one of these must be set up on a *sensor* (i.e., flow *exporter* / router), to provide the incoming flow data. -You can do this step later, but it will helpful to have it working first. - -Sflow and netflow data should be exported to the pipeline host where there will be *collectors* (nfcapd and/or sfcapd processes) ready to receive it (see below). To use the default settings, send sflow to port 9998 and netflow/IPFIX to port 9999. On the pipeline host, allow incoming traffic from the flow exporters, of course. - -Tstat data should be sent directly to the logstash input rabbit queue "netsage_deidentifier_raw" on the pipeline host. No collector is needed for tstat data. See the netsage-project/tstat-transport repo. (From there, logstash will grab the data and process it the same way as it processes sflow/netflow data. (See the Docker Advanced guide.) - -### 2. Set up a Pipeline Host -Decide where to run the Docker Pipeline and get it set up. Adjust iptables to allow the flow exporters (routers) to send flow data to the host. - -Install Docker Engine (docker-ce, docker-ce-cli, containerd.io) - see instructions at [https://docs.docker.com/engine/install/](https://docs.docker.com/engine/install/). - -Install Docker Compose from Docker's GitHub repository - see [https://docs.docker.com/compose/install/](https://docs.docker.com/compose/install/). You need to **specify version 1.29.2** (or newer) in the curl command. - -Check default file permissions. If the *logstash* user is not able to access the logstash config files in the git checkout, you'll get an error from logstash saying there are no .conf files found even though they are there. Various components also need to be able to read and write to the data/ directory in the checkout. Defaults of 775 (u=rwx, g=rwx, o=rx) should work. - -### 3. Clone the Netsage Pipeline Project - -Clone the netsage-pipeline project from github. -```sh -git clone https://github.com/netsage-project/netsage-pipeline.git -``` - -When the pipeline runs, it uses the logstash conf files that are in the git checkout (in conf-logstash/), as well as a couple other files like docker-compose.yml, so it is important to checkout the correct version. - -Move into the netsage-pipeline/ directory (**all git and docker commands must be run from inside this directory!**), then checkout the most recent version of the code. It will say you are in 'detached HEAD' state. -```sh -git checkout {tag} -``` -Replace "{tag}" with the release version you intend to use, e.g., "v1.2.11". ("Master" is the development version and is not intended for general use!) -`git status` will confirm which branch you are on, e.g., master or v1.2.11. - -### 4. Create Docker-compose.override.yml - -Information in the `docker-compose.yml` file tells docker which containers (processes) to run and sets various parameters for them. -Settings in the `docker-compose.override.yml` file will overrule and add to those. Note that docker-compose.yml should not be edited since upgrades will replace it. Put all customizations in the override file, since override files will not be overwritten. - -Collector settings may need to be edited by the user, so the information that docker uses to run the collectors is specified (only) in the override file. Therefore, docker-compose_override.example.yml must always be copied to docker-compose_override.yml. - -```sh -cp docker-compose.override_example.yml docker-compose.override.yml -``` - -By default docker will bring up a single sflow collector and a single netflow collector that listen to udp traffic on ports localhost:9998 and 9999. If this matches your case, you don't need to make any changes to the docker-compose.override_example.yml. - -- If you have only one collector, remove or comment out the section for the one not needed so the collector doesn't run and simply create empty nfcapd files. -- If the collectors need to listen to different ports, make the appropriate changes here in both the "command:" and "ports:" lines. -- By default, the collectors will save flows to nfcapd files in sflow/ and netflow/ subdirectories in `./data/input_data/` (i.e., the data/ directory in the git checkout). If you need to save the data files to a different location, see the Docker Advanced section. - -Other lines in this file you can ignore for now. - -:::note -If you run into issues, try removing all the comments in the override file as they may conflict with the parsing done by docker-compose, though we have not found this to be a problem. -::: - -### 5. Choose Pipeline Version - -Once you've created the docker-compose.override.xml file and finished adjusting it for any customizations, you're ready to select which image versions Docker should run. - -```sh -./scripts/docker_select_version.sh -``` -When prompted, select the **same version** you checked out earlier. - -This script will replace the version numbers of docker images in docker-compose.override.yml and docker-compose.yml with the correct values. - -### 6. Create Environment File - -{@import ../components/docker_env.md} - -## Testing the Collectors - -At this point, you can start the two flow collectors by themselves by running the following line. If you only need one of the collectors, remove the other from this command. - -(See the next section for how to start all the containers, including the collectors.) - -```sh -docker-compose up -d sflow-collector netflow-collector -``` - -Subdirectories for sflow/netflow, year, month, and day are created automatically under `data/input_data/`. File names contain dates and times. -These are not text files; to view the contents, use an [nfdump command](http://www.linuxcertif.com/man/1/nfdump/) (you will need to install nfdump). -Files will be deleted automatically by the importer as they age out (the default is to keep 3 days). - -If the collector(s) are running properly, you should see nfcapd files being written every 5 minutes and they should have sizes of more than a few hundred bytes. (Empty files still have header and footer lines.) -See Troubleshooting if you have problems. - -To stop the collectors -```sh -docker-compose down -``` - -## Running the Collectors and Pipeline - -{@import ../components/docker_pipeline.md} - diff --git a/website/versioned_docs/version-1.2.11/deploy/docker_troubleshooting.md b/website/versioned_docs/version-1.2.11/deploy/docker_troubleshooting.md deleted file mode 100644 index 8b10ed1d..00000000 --- a/website/versioned_docs/version-1.2.11/deploy/docker_troubleshooting.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -id: docker_troubleshoot -title: Docker Troubleshooting -sidebar_label: Troubleshooting ---- - -## Troubleshooting - -### If you are not seeing flows after installation - -**Troubleshooting checklist:** - -- Make sure you configured your routers to point to the correct address/port where the collector is running.  -- Check iptables on your pipeline host to be sure incoming traffic from the routers is allowed. -- Use `docker-compose ps` to be sure the collectors (and other containers) are running. -- Check to see if nfcapd files are being written. There should be a directory for the year, month, day and files should be larger than a few hundred bytes. If the files exist but are too small, the collector is running but there are no incoming flows. "nfdump -r filename" will show the flows in a file. -- Make sure you created .env and docker-compose.override.yml files and updated the settings accordingly, sensorName especially since that identifies the source of the data. -- Check the logs of the various containers to see if anything jumps out as being invalid.  `docker-compose logs -f $service_label` -- Check the logs to see if logstash is starting successfully. -- If the final rabbit queue is on an external host, check iptables on that host to be sure incoming traffic from your pipeline host is allowed. - -To see if flows are getting into and being read from the rabbit queue on the pipeline host, you can go to `http://localhost:15672` in your favorite web browser. Login as guest with password guest. Look for accumulating messages and/or messages being acknowledged and published. - -### If flow collection stops - -*Errors:** -- See if any of the containers has died. `docker ps` -- Check the logs of the various containers to see if anything jumps out as being invalid. Eg, `docker-compose logs logstash`. -- If logstash dies with an error about not finding \*.conf files, make sure conf-logstash/ and directories and files within are readable by everyone (and directories are executable by everyone). The data/ directory and subdirectories need to be readable and writable by everyone, as well. - -**Disk space:** -- If the pipeline suddenly fails, check to see if the disk is full. If it is, first try getting rid of old docker images and containers to free up space: `docker image prune -a` and `docker container prune`. -- Also check to see how much space the nfcapd files are consuming. You may need to add more disk space. You could also try deleting nfcapd files after a fewer number of days (see Docker Advanced). - -**Memory:** -- If you are running a lot of data, sometimes docker may need to be allocated more memory. The most -likely culprit is logstash which is only allocated 2GB of RAM by default. - -Please see the Docker Advanced guide. - diff --git a/website/versioned_docs/version-1.2.11/deploy/docker_upgrade.md b/website/versioned_docs/version-1.2.11/deploy/docker_upgrade.md deleted file mode 100644 index ba9ab726..00000000 --- a/website/versioned_docs/version-1.2.11/deploy/docker_upgrade.md +++ /dev/null @@ -1,78 +0,0 @@ ---- -id: docker_upgrade -title: Upgrading -sidebar_label: Docker - Upgrading ---- - -To upgrade a previous installment of the Dockerized pipeline, perform the following steps. - -### Shut things down - -```sh -cd {netsage-pipeline directory} -docker-compose down -``` -This will stop all the docker containers, including the importer, logstash, and any collectors. Note that incoming flow data will not be saved during the time the collectors are down. - -### Update Source Code - -To upgrade to a new release, pull new code from github and ine images from dockerhub. Your customized .env and override files will not be overwritten. - -Update the git repo, mainly to be able to see the latest tags. -```sh -git reset --hard -git pull origin master -``` - -:::warning -git reset --hard will obliterate any changes you have made to non-override files. If necessary, please make sure you commit and save to a feature branch before continuing. - -Example: -```git commit -a -m "Saving local state"; git checkout -b feature/backup; git checkout master``` -::: - -Run these three commands to select the new release you want to run. In the first, replace "{tag}" by the version to run (eg, v1.2.10). When asked by the third, select the same version as the tag you checked out. -```sh -git checkout -b {tag} -git pull -./scripts/docker_select_version.sh -``` -Check to be sure docker-compose.yml and docker-compose.override.yml both now have the version number you selected for pipeline_importer and pipeline_logstash. - -### Check/Update Override Files -Occasionally, something may change which will necessitate editing your override and/or env file. - -- Compare the new `docker-compose.override_example.yml` file to your `docker-compose.override.yml`. Look for`version: "x.x"` at the top. If the version number is different, change it in your docker-compose.override.yml file. (This is the Compose file format version.) - -- Check to see if the version of nfdump has changed. Look for lines like `image: netsage/nfdump-collector:`. Make sure the version in your override file matches what is the example file. (You do not need to actually perform any upgrade yourself. This will ensure the correct version is pulled from Docker Hub.) - -- Also compare your `.env` file with the new `env.example` file to see if any new lines or sections have been added. If there have been any changes relevant to your deployment, eg, new options you want to use, copy the changes into your .env file. - -- If you used the Docker Advanced guide to make a `netsage_override.xml` file, compare it to `netsage_shared.xml` to see if there are any changes. This is unlikely. - - -### Update Docker Containers - -Do not forget this step! Pull new images from Docker Hub. This applies for both development and release versions. - -``` -docker-compose pull -``` - -### Restart all the Docker Containers - -``` -docker-compose up -d -``` - -This will start all the services/containers listed in the docker-compose.yml and docker-compose.override.yml files, including the importer, logstash pipeline, and collectors. - -### Delete old images and containers - -To save space, delete any old images and containers that are not being used. - -``` -docker image prune -a -docker container prune -``` - diff --git a/website/versioned_docs/version-1.2.11/devel/docker.md b/website/versioned_docs/version-1.2.11/devel/docker.md deleted file mode 100644 index 21cb7d5c..00000000 --- a/website/versioned_docs/version-1.2.11/devel/docker.md +++ /dev/null @@ -1,83 +0,0 @@ ---- -id: docker_dev_guide -title: Docker Dev Guide -sidebar_label: Docker Dev Guide ---- - -## Selecting a Version - -You can use the "master" version or a tagged version. -To select a released version use the docker_select_version.sh script (see the Deployment Guide). -If you wish to use the development version (master branch) simply skip the docker_select_version.sh step. - -## Installing - -See the Deployment Guide to learn how to set up collectors, your environment and override files, etc. - -## Importer - -The importer "shared" config that Docker uses is defined in compose/netsage_shared.xml. ** NOTE: If you want to make changes to this file, you will need to rebuild the container** - -## Build Images - -The images are published on Docker Hub, but if you'd like to incorporate local changes please follow the process below. - -### Build Using Source Code - -If you would like to build the *importer* container using the version of the pipeline scripts found in the GitHub repo then run the following: - -```sh -docker-compose -f docker-compose.build.yml build - -``` - -NOTE: The importer container includes the config files for the logstash pipeline. - - -## Optional: ElasticSearch and Kibana - -You can optionally store flow data locally in an ElasticSearch container and view the data with Kibana. Local storage can be enabled with the following steps: - -1. Uncomment the following lines in conf-logstash/99-outputs.conf: - -``` -elasticsearch { - hosts => ["elasticsearch"] - index => "netsage_flow-%{+YYYY.MM.dd}" -} -``` - -2. Comment out the `rabbitmq {...}` block in conf-logstash/99-outputs.conf if you do not want to also send logstash output to RabbitMQ. - -3. Run the containers using the following line: ` ` ` docker-compose -f docker-compose.yml -f docker-compose.develop.yml up -d ` ` ` - -## Handy Docker Commands - -### Start the Containers - -``` sh -docker-compose up -d -``` - -### Stop the Containers - -``` sh -docker-compose stop && docker-compose rm -``` - -### Enter a Container Shell - -``` sh -docker-compose exec logstash bash #bash shell in logstash container -docker-compose exec importer bash #bash shell in importer container -docker-compose exec rabbit bash #bash shell in rabbit container -``` - -### View Container Logs - -``` sh -docker-compose logs -f #view logs for all containers -docker-compose logs -f logstash #view logs for logstash container -docker-compose logs -f importer #view logs for importer container -docker-compose logs -f rabbit #view logs for rabbit container -``` diff --git a/website/versioned_docs/version-1.2.11/devel/documentation_guide.md b/website/versioned_docs/version-1.2.11/devel/documentation_guide.md deleted file mode 100644 index 076628b2..00000000 --- a/website/versioned_docs/version-1.2.11/devel/documentation_guide.md +++ /dev/null @@ -1,143 +0,0 @@ ---- -id: docusaurus -title: Revising Documentation -sidebar_label: Docusaurus ---- - -This project's documentation uses Docusaurus. - -Docusaurus converts markdown into html and builds a static website using React UI components, which can be exported to a webserver. - -Yarn is a package manager for JavaScript and replaces the npm client. It is not strictly necessary but highly encouraged. - -To extend the docs simply create a markdown file and reference the ID in the side bar config. Please see the related documentation -at the [docusaurus 2](https://v2.docusaurus.io/) project website. - -*THE FOLLOWING INSTRUCTIONS ARE NOT CONFIRMED TO WORK. PLEASE UPDATE WITH CORRECTIONS.* - -## If Not Using Docker -These are instructions for editing and releasing docs without using Docker. - -### Installation - -To get started the first time, install npm, then use that to install yarn -``` -$ sudo yum install npm -$ sudo npm install -g yarn -``` - -Git clone the netsage pipeline project, then run yarn install to get all the dependencies listed within package.json -``` -$ cd netsage-pipeline/website -$ yarn install -``` - -### If Local Development - -If you are working on your local machine, rather than sshing into a host, you can view changes to the docs in a browser as you work. Use the following commands to generate the static website content (gets written into the build directory), then start a local development server and open up a browser window in which to view the docs. Most changes you make will be reflected live without having to restart the server. -``` -$ yarn build -$ yarn start -go to http://localhost:3000 -``` - -### To Make Changes -Whether on a local machine or a linux host, to make changes, edit the files in website/docs/. -When finished, git add, git commit, git push, as usual. -Repeat as needed. - -To view the changes you've made with some formatting, just go to the file on github in a browser. To see all of the formatting, read the "Deploying Docs to github.io" section below. - -### Tagging a New release - -When it's time to release a new version of the Pipeline, you need to create a new version of the docs as well. - -Once the documentation is stable and you don't forsee any new change, please do the following: - -``` -$ yarn run docusaurus docs:version a.b.c -``` - -replacing a.b.c with the next release version number. -This will create new versioned docs in website/versioned_docs/. - -Then edit docusaurus.config.js and change `lastVersion:` to refer to the new version number. - -Finally, commit and push the following to github: - * website/versioned_docs/version-a.b.c/ - * website/versioned_sidebars/version-a.b.c.sidebars.json - * versions.json - * docusaurus.config.js - - -### Deploying Docs to github.io -Whether you have created a new set of versioned tags or just want to update the docs in "master", to make changes appear at https://netsage-project.github.io/netsage-pipeline, do the following. - -If Travis or some other CI is working, it will run yarn install and yarn deploy to do this automatically. - -If it is not, do it manually: -``` -$ USE_SSH="true" GIT_USER="your-username" yarn deploy -``` -replacing your-username. This sets a couple env vars then runs 'yarn deploy' which runs 'docusaurus deploy' (see package.json) which pushes the static website created to url: "https://netsage-project.github.io" (see docusaurus.config.js) - -NOTE: You need to have created ssh keys on the host you are running this on and added them to your github account. - -### Removing a version - -To remove version 1.2.6 of the docs, for example, - -we need to: - - * update versions.json to remove the reference - * remove the versioned_docs/version-1.2.6 - * remove versioned_sidebars/version-1.2.6-sidebars.json - -## If Using Docker - -You may also use a docs Docker container to simplify installation, making changes, and deployment. This method starts a local web server that allows you to see changes to the docs in a browser on your local machine, as they are made. - -### Build and Start the Container - -Git clone the netsage pipeline project then build and start the container. -The Dockerfile in website/ tells how to build an image that runs yarn. Docker-compose.yml brings up a docs container. -``` -$ cd netsage-pipeline/website -$ docker-compose build build_docs -$ docker-compose up -d docs -go to http://localhost:8000/netsage-pipeline/ -``` - -### To Make Changes -Whether on a local machine or a linux host, to make changes, edit the files in website/docs/. -When finished, git add, git commit, git push, as usual. -Repeat as needed. - -### Tagging a New release - -When it's time to release a new version of the Pipeline, you need to create a new version of the docs as well. - -Once the documentation is stable and you don't forsee any new change, please do the following: - -``` -$ docker-compose build build_docs -$ docker-compose run docs yarn run docusaurus docs:version a.b.c -``` -replacing a.b.c with the next release version number. -This will create new versioned docs in website/versioned_docs/. - -Then edit docusaurus.config.js and change `lastVersion:` to refer to the new version number. - -Finally, commit and push the following to github: - * website/versioned_docs/version-a.b.c/ - * website/versioned_sidebars/version-a.b.c.sidebars.json - * versions.json - * docusaurus.config.js - - -### Deploying Docs to github.io -How to do this when using Docker ??? Get into the container ??? - -For now, go a linux server that has yarn installed and -follow the instructions under If Not Using Docker. - diff --git a/website/versioned_docs/version-1.2.11/devel/pipeline_dataset.md b/website/versioned_docs/version-1.2.11/devel/pipeline_dataset.md deleted file mode 100644 index a061957d..00000000 --- a/website/versioned_docs/version-1.2.11/devel/pipeline_dataset.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -id: dev_dataset -title: Pipeline Replay Dataset -sidebar_label: Replay Dataset ---- - -The Netsage Pipeline processes network data. Though there are some components and patterns we can use to test -the behavior using things like the Ruby unit [tests](https://github.com/netsage-project/netsage-pipeline/tree/master/conf-logstash/ruby/spec) in logstash, and the [generator](https://www.elastic.co/guide/en/logstash/current/plugins-inputs-generator.html) pligin, but the best -test is to replay network data and inspect the output in the grafana dashboard. - -Two sample data set are provided for the two types of collectors we have (Netflow and Sflow). The network data and ips have been anonymized and should have no identifying information. - -You can download the files from [here](https://drive.google.com/drive/folders/19fzY5EVoKwtYUaiBJq5OxAR82yDY0taG). - -Please take note of which ports the collectors are listing on. Check your docker-compose.override.yml file. If you are using default ports, they should match this [example](https://github.com/netsage-project/netsage-pipeline/blob/master/docker-compose.override_example.yml). - -Currently the default ports are: - - 9998/udp for sflow - - 9999/udp for netflow - -Naturally the collectors have to be running in order for any of this to be usable. You can read more on how to get them running in the [Docker Simple Deployment Guide](../deploy/docker_install_simple.md#running-the-collectors) - -In order to replay the data, use the following commands for netflow and sflow respectively: - -### Netflow - -``` -nfreplay -H 127.0.0.1 -p 9999 -r nfcapd-ilight-anon-20200114 -v 9 -d 1000 -``` - -### Sflow - -Coming soon. nfreplay will not work with sflow data type. - diff --git a/website/versioned_docs/version-1.2.11/devel/tag.md b/website/versioned_docs/version-1.2.11/devel/tag.md deleted file mode 100644 index 040de851..00000000 --- a/website/versioned_docs/version-1.2.11/devel/tag.md +++ /dev/null @@ -1,46 +0,0 @@ ---- -id: docker_dev_tag -title: How to Tag a New Release -sidebar_label: Taggin a Release ---- - -To tag a new release, first updated the version number and Changes file, build the rpm, etc. and upgrade on bare-metal hosts using yum. If all works fine, do the following steps to create new Docker images. - -## In Github, Create a Release/Tag - -Be sure to copy info from the Changes file into the Release description. - -Do this first ??? - -## To Build and Push an Importer Image Manually - -Git clone the pipeline project and have the ?? branch checked out. - -``` -$ docker-compose build -$ docker login -$ docker push $image:$tag -``` - -This will build the image and push it to Docker Hub. - -The person doing this has to have a Docker Hub account and belong to the Netsage team (3 users are allowed, for the free level). - -## With Automation - - -## Versioned Docs - -A new set of versioned docs also has to be tagged. See the Docusaurus guide. - -Does this have to happen before Building the image ?? - -## New Version of Nfdump - -If a new version of dfdump has been released that we need, -???? - -## New Version of Logstash - -If a new version of logstash has been released that we want everyone to use, -??? diff --git a/website/versioned_docs/version-1.2.11/pipeline/elastic_search.md b/website/versioned_docs/version-1.2.11/pipeline/elastic_search.md deleted file mode 100644 index c82a8dbd..00000000 --- a/website/versioned_docs/version-1.2.11/pipeline/elastic_search.md +++ /dev/null @@ -1,124 +0,0 @@ ---- -id: elastic -title: Elasticsearch -sidebar_label: Elasticsearch ---- - -Flow data is ultimately saved to Elasticsearch. Following are the fields that are used/created in Logstash and that you may see returned by an elasticsearch query. - -### Flow fields - -|name |example |description | -|-----------------------|-----------------------|-----------------------------| -|start |Jun 9, 2020 @ 17:39:53.808 | Start time of the flow (first packet seen)| -|end |Jun 9, 2020 @ 17:39:57.699 |End time of the flow (last packet seen)| -|meta.id |a17c4f0542... |Id of the flow (hash of 5-tuple + Sensor name)| -|es_doc_id |4f46bef884... |Hash of meta.id and start time. May be used as doc id in ES to prevent duplicates, but see Notes elsewhere.| -|meta.flow_type |sflow |'sflow', 'netflow', or 'tstat'| -|meta.protocol |tcp |Protocol used| -|meta.sensor_id | snvl2-pw-sw-1-mgmt-2.cenic.net|Sensor name (set in importer config, may not always be a hostname) | -|meta.sensor_group |CENIC |Sensor group, usually the network | -|meta.sensor_type |Regional Network |Sensor type ('Circuit', 'Regional Network', etc) | -|meta.country_scope |Domestic |'Domestic', 'International', or 'Mixed', depending on countries of src and dst| -|meta.is_network_testing | no | 'yes' if discipline is 'CS.Network Testing and Monitoring' or port is one used for PerfSonar: 5001, 5101, or 5201| - -### Source Fields (Destination Fields similarly with "dst") - -|name |example |description | -|-----------------------|-----------------------|-----------------------------| -|meta.src_ip |171.64.68.x | deidentified IP address| -|meta.src_port |80 |port used | -|meta.src_asn |32 |Source ASN from the flow header or, in some cases, the ANS of the IP from the MaxMind GeoIP ASN database| -|meta.src_organization |Stanford University | organization that owns the AS from the CAIDA ASN-Organization database -|meta.src_location.lat | 37.423 | latitude of the IP from the MaxMind GeoIP City database| -|meta.src_location.lon |-122.164 | longitude of the IP from the MaxMind GeoIP City database| -|meta.src_country_name |United States | country of the IP from the MaxMind GeoIP City database| -|meta.src_continent |North America | continent of the IP the MaxMind GeoIP City database| -|meta.src_ifindex |166 |the index of the interface the flow came into| - -### Source Science Registry Fields (Destination Fields similarly with "dst") -The [Science Registry](https://scienceregistry.netsage.global/rdb/) stores human-curated information about various "resources". Resources are sources and destinations of flows. - -|name |example |description | -|-----------------------|-----------------------|-----------------------------| -|meta.scireg.src.discipline |MPS.Physics.High Energy |The science discipline that uses the resource (ie IP). Note that not the src MAY not have the same discipline as the dst. | -|meta.scireg.src.role |Storage |Role that the host plays | -|meta.scireg.src.org_name |Boston University (BU) |The organization the manages and/or uses the resource, as listed in the Science Registry| -|meta.scireg.src.org_abbr |Boston U |A shorter name for the organization. May not be the official abbreviation.| -|meta.scireg.src.resource |BU - ATLAS |Descriptive resource name from SciReg | -|meta.scireg.src.resource_abbr | |Resource abbreviation (if any)| -|meta.scireg.src.project_names |ATLAS |"Projects" that the resource is part of| -|meta.scireg.src.latitude |37.4178 |Resource's latitude, as listed in the Science Registry| -|meta.scireg.src.longitude |-122.178 |Resource's longitude, as listed in the Science Registry| - -### Source "Preferred" Fields (Destination Fields similarly with "dst") - -|name |example |description | -|-----------------------|-----------------------|-----------------------------| -|meta.src_preferred_org |Stanford University |If the IP was found in the Science Registry, this is the SciReg organization, otherwise it is the CAIDA organization| -|meta.src_preferred_location.lat |37.417800 | Science Registry value if available, otherwise the MaxMind City DB value| -|meta.src_preferred_location.lon |-122.172000i | Science Registry value if available, otherwise the MaxMind City DB value | - -### Value Fields - -|name |example |description | -|-----------------------|-----------------------|-----------------------------| -|values.num_bits |939, 458, 560 |Sum of the number of bits in the (stitched) flow| -|values.num_packets |77, 824 |Sum of the number of packets in the (stitched) flows| -|values.duration |3.891 |Calculated as end minus start.| -|values.bits_per_second |241, 443, 988 |Calculated as num_bits divided by duration | -|values.packets_per_second |20, 001 |Calculated as num_packets divided by duration| - -### Tstat Value Fields - -|name |example | -|-----------------------|-----------------------| -|values.tcp_cwin_max |1549681 | -|values.tcp_cwin_min |17| -|values.tcp_initial_cwin|313| -|values.tcp_max_seg_size|64313| -|values.tcp_min_seg_size|17| -|values.tcp_mss |8960| -|values.tcp_out_seq_pkts|0| -|values.tcp_pkts_dup |0| -|values.tcp_pkts_fc |0| -|values.tcp_pkts_fs |0| -|values.tcp_pkts_reor |0| -|values.tcp_pkts_rto |0| -|values.tcp_pkts_unfs |0| -|values.tcp_pkts_unk |2| -|values.tcp_pkts_unrto |0| -|values.tcp_rexmit_bytes |1678| -|values.tcp_rexmit_pkts |2| -|values.tcp_rtt_avg |0.044| -|values.tcp_rtt_max |39.527| -|values.tcp_rtt_min |0.001| -|values.tcp_rtt_std |0.276| -|values.tcp_sack_cnt | 1| -|values.tcp_win_max |1549681| -|values.tcp_win_min |17| -|values.tcp_window_scale |13| - -### Developer Fields - -|name |example |description | -|-----------------------|-----------------------|-----------------------------| -|@pipeline_ver |1.2.11 | Version number of the pipeline used to process this flow | -|@ingest_time |Jun 9, 2020 @ 10:03:20.700 | The time the flow entered the logstash pipeline | -|@timestamp |Jun 9, 2020 @ 18:03:21.703 |The time the flow entered the logstash pipeline for tstat flows, or the time stitching finished and the event exited the aggregation filter for other flows.| -|@exit_time |Jun 9, 2020 @ 18:03:25.369 |The time the flow exited the pipeline | -|@processing_time |688.31 |@exit_time minus @ingest_time. Useful for seeing how long stitching took. | -|stitched_flows |13 |Number of flows that came into logstash that were stitched together to make this final one. 1 if no flows were stitched together. 0 for tstat flows, which are never stitched. | -|tags |maxmind src asn |Various info and error messages| -|trial | 5 |Can be set in 40-aggregation.conf if desired| - -### Elasticsearch Fields - -|name |example |description | -|-----------------------|-----------------------|-----------------------------| -|_index | om-ns-netsage-2020.06.14 | name of the index ("database table") | -|_type |_doc | set by ES | -|_id |HRkcm3IByJ9fEnbnCpaY | elasticsearch document id. | -|_score |1 |set by ES query | -|@version |1 | set by ES | - diff --git a/website/versioned_docs/version-1.2.11/pipeline/importer.md b/website/versioned_docs/version-1.2.11/pipeline/importer.md deleted file mode 100644 index 24b05c4b..00000000 --- a/website/versioned_docs/version-1.2.11/pipeline/importer.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -id: importer -title: Importer -sidebar_label: Importer ---- -A netsage-netflow-importer script reads any new nfcapd files that have come in after a configurable delay and writes the results to the "netsage_deidentifier_raw" RabbitMQ queue. -All flow data waits in the queue until it is read in and processed by the logstash pipeline. - -To read nfcapd files, the importer uses an nfdump command with the "-a" option to aggregate raw flows within the file by the "5-tuple," i.e., the source and destination IPs, ports, and protocol. The "-L" option is used to throw out any aggregated flows below a threshold number of bytes. This threshold is specified in the importer config file. - -### Configuration -Configuration files for the importer are netsage_netflow_importer.xml and netsage_shared.xml in /etc/grnoc/netsage/deidentfier/. Comments in the files briefly describe the options. See also the Deployment pages in these docs. - -To avoid re-reading nfcapd files, the importer stores the names of files that have already been read in /var/cache/netsage/netflow_importer.cache. diff --git a/website/versioned_docs/version-1.2.11/pipeline/intro.md b/website/versioned_docs/version-1.2.11/pipeline/intro.md deleted file mode 100644 index f4cce287..00000000 --- a/website/versioned_docs/version-1.2.11/pipeline/intro.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -id: intro -title: Intro -sidebar_label: Intro ---- -# The NetSage Pipeline - -## Description - -The Netsage Flow Processing Pipeline is composed of several components for processing network flow data, including importing, deidentification, metadata tagging, flow stitching, etc. -There are many ways the components can be combined, configured, and run. These documents will describe the standard "simple" set up and provide information for more complex configurations. - -## Data Collection - -In Netsage, sensor(s) are network devices configured to collect flow data ([tstat](http://tstat.polito.it/), [sflow](https://www.rfc-editor.org/info/rfc3176), or [netflow](https://www.cisco.com/c/en/us/products/collateral/ios-nx-os-software/ios-netflow/prod_white_paper0900aecd80406232.html)) and send it to a "pipeline host" for processing. - -Tstat flow data can be sent directly to the pipeline ingest RabbitMQ queue on the pipeline host using the Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) tool. This can be installed as usual or via Docker. - -Sflow and netflow data from configured routers should be sent to the pipeline host where it is collected and stored into nfcapd files using [nfdump tools](https://github.com/phaag/nfdump). The Netsage project has packaged the nfdump tools into a [Docker container](https://github.com/netsage-project/docker-nfdump-collector) for ease of use. - -## Pipeline Components - -The Netsage Flow Processing Pipeline is made of the following components - - - Importer: Perl scripts on the pipeline host that read nfcapd flow files and send the flow data to a RabbitMQ queue. ([Doc](importer.md), [in github](https://github.com/netsage-project/netsage-pipeline/blob/master/lib/GRNOC/NetSage/Deidentifier/NetflowImporter.pm)) - - [RabbitMQ](https://www.rabbitmq.com/): Used for message passing and queuing of tasks. - - [Logstash](https://www.elastic.co/logstash) pipeline: Performs a variety of operations on the flow data to transform it and add additional information. ([Doc](logstash.md)) - - [Elasticsearch](https://www.elastic.co/what-is/elasticsearch): Used for storing the final flow data. - -## Visualization - -[Grafana](https://grafana.com/oss/grafana/) or [Kibana](https://www.elastic.co/kibana) can be used to visualize the data stored in elasticsearch. Netsage Grafana Dashboards are available [in github](https://github.com/netsage-project/netsage-grafana-configs). - -## Pipeline Installation - -Originally, the pipeline was deployed by installing all of the components individually on one or more servers (the "BareMetal" or "Manual" Install). More recently, we've also added a Docker deployment option. With simple pipelines having just one sflow and/or one netflow sensor (and any number of tstat sensors), the basic "Docker Installation" should suffice. The "Docker Advanced Options" guide will help when there are more sensors and/or other customizations required. - diff --git a/website/versioned_docs/version-1.2.11/pipeline/logstash.md b/website/versioned_docs/version-1.2.11/pipeline/logstash.md deleted file mode 100644 index 658b240a..00000000 --- a/website/versioned_docs/version-1.2.11/pipeline/logstash.md +++ /dev/null @@ -1,128 +0,0 @@ ---- -id: logstash -title: Logstash Pipeline -sidebar_label: Logstash ---- - -The Logstash portion of the Netsage Pipeline reads in flows from a RabbitMQ queue, performs various transformations and adds additional information to them, then sends them to a location specified in the output logstash config, eventually ending up in an Elasticsearch instance. - -Logstash config files invoke various logstash "filters" and actions. These conf files are located in /etc/logstash/conf.d/. See below for a brief description of what each does and check the files for comments. - -Notes: - - All \*.conf files in conf.d/ are executed in alphabetical order, as if they were one huge file. Those ending in .disabled will not be executed (assuming 'path.config: "/etc/logstash/conf.d/*.conf"' in /etc/logstash/pipelines.yml). - - If actions in a particular .conf file are not needed in your particular case, they can be removed or the file disabled, but check carefully for effects on downstream configs. - - MaxMind, CAIDA, and Science Registry database files required by the geoip and aggregate filters are downloaded from scienceregistry.netsage.global via cron jobs weekly or daily. (MaxMind data can change weekly, CAIDA quarterly, Science Registry information randomly.) **NOTE that new versions won't be used in the pipeline until logstash is restarted.** There is a cron file to do this also, though it's not running in Docker deployments. Similarly for other support files, eg, those used in 90-additional-fields.conf. - - Lookup tables for 55-member-orgs.conf that we have compiled are available from sciencregistry.grnoc.iu.edu. See the cron files provided. These will not be updated often, so you may run the cron jobs or not. You will need to provide lists for other networks yourself or ask us. - -## Logstash Sequence - -The main things done in each conf file are as follows. - -### 01-input-rabbit.conf - -Reads flows from a rabbitmq queue. (The ".disabled" extenstion can be removed from other 01-input configs available in conf.d/ to get flows from other sources.) - -### 10-preliminaries.conf - -Drops flows to or from private IP addresses; -converts any timestamps in milliseconds to seconds; -drops events with timestamps more than a year in the past or (10 sec) in the future; -sets duration and rates to 0 if duration is <= 0.002 sec (because tiny durations/few samples lead to inaccurate rates) - -### 15-sensor-specific-changes.conf - -Makes any changes to fields needed for specific sensors. This config currently provides 1) the ability to drop all flows that do not use interfaces (ifindexes) in a specfied list; lists can be sensor-specific, 2) the ability to change the sensor name for flows from a specified sensor which use a certain interface, and 3) the ability to apply a sampling rate correction manually for named sensors. You may edit the file in a bare-metal installation and specify everything explicitly (upgrades will not overwrite this config) or you may use the environment file specified in the systemd unit file. For Docker installations, use the .env file to specifiy the parameters. By default, this config will do nothing since the flags will be set to False. - -### 20-add_id.conf - -Adds a unique id (evenutally called meta.id) which is a hash of the 5-tuple of the flow (src and dst ips and ports, and protocol) plus the sensor name. This id is used for aggregating (stitching) in the next step. - -### 40-aggregation.conf - -Stitches together flows from different nfcapd files into longer flows, matching them up by meta.id and using a specified inactivity_timeout to decide when to start a new flow. - -Notes: - - By default, 5-minute nfcapd files are assumed and the inactivity_timeout is set to 10.5 minutes. If more than 10.5 min have passed between the start of the current flow and the start of the last matching one, do not stitch them together. - - If your nfcapd files are written every 15 minutes, change the inactivity_timeout to at least 16 minutes. - - There is another "timeout" setting which is basically the maximum duration of a stitched flow (default: 24 hr). - - When logstash shuts down, any flows "in the aggregator" will be written out to aggregate_maps_path (default: /tmp/logstash-aggregation-maps). The file is then read back in when logstash is restarted so aggregation can continue. - - Your logstash pipeline can have only 1 worker or aggregation is not going to work! This is set in the logstash config file. - - Tstat flows come in already complete, so no aggregation is done on those flows. - -### 45-geoip-tagging.conf - -Queries the MaxMind GeoLite2-City database by IP to get src and dst Countries, Continents, Latitudes, and Longitudes; -if the destination IP is in the multicast range, sets the destination Organization, Country, and Continent to "Multicast". - -*This product uses GeoLite2 data created by MaxMind, available from [www.maxmind.com](http://www.maxmind.com).* - -### 50-asn.conf - -Normally with sflow and netflow, flows come in with source and destination ASNs. If there is no ASN in the input event; or the input ASN is 0, 4294967295, or 23456, or it is a private ASN, tries to get an ASN by IP from the MaxMind ASN database. -Sets ASN to -1 if it is unavailable for any reason. - -### 53-caida-org.conf - -Uses the current source and destination ASNs to get organization names from the prepared CAIDA ASN-to-Organization lookup file. - -*This product uses a lookup table constructed from the CAIDA AS Organizations Dataset - see [www.caida.org](http://www.caida.org/data/as-organizations).* - -### 55-member-orgs.conf - -Searches any provided lookup tables by IP to obtain member or customer organization names and overwrite the Organization determined previously. -This allows entities which don't own their own ASs to be listed as the src or dst Organization. - -Note: These lookup tables are not stored in github, but an example is provided to show the layout and tables we have can be downloaded via a cron job. - -### 60-scireg-tagging-fakegeoip.conf - -Uses a fake geoip database containing [Science Registry](http://scienceregistry.grnoc.iu.edu) information to tag the flows with source and destination science disciplines and roles, organizations and locations, etc; -removes Registry fields we don't need to save to elasticsearch. - -Notes: - - The [Science Registry](https://scienceregistry.netsage.global/rdb/) stores human-curated information about various "resources". Resources are sources and destinations of flows. - - The Science Registry "fake geoip database" is updated weekly and can be downloaded via wget in a cron job (provided in the installation). - -### 70-deidentify.conf - -Replaces the last octet of IPv4 addresses and the last 4 hextets of IPv6 addresses with x's in order to deidentify them. - -### 80-privatize.org.conf - -Removes information about Australian organizations (or, with modification, any country that has privacy rules that require us not to identify organizations). -If the ASN is one of those listed, completely replaces the IP with x's, sets the location to central Autralia, sets all organizations to "AARNet", removes all Projects. - -### 88-preferred-location-org.conf - -Copies Science Registry organization and location values, if they exist, to the meta.preferred_organization and meta.preferred_location fields. If there are no Science Registry values, the organizations and locations from the CAIDA and MaxMind lookups, respectively, are saved to those fields. - -### 90-additional-fields.conf - -Sets additional quick and easy fields. Supporting mapping or ruby files are used - see support/ and ruby/ in conf.d/. Currently we have (for Netsage's use): - - sensor_group = TACC, AMPATH, etc. (based on matching sensor names to regexes) - - sensor_type = Circuit, Archive, Exchange Point, or Regional Network (based on matching sensor names to regexes) - - country_scope = Domestic, International, or Mixed (based on src and dst countries and possibly continents, where Domestic = US, Puerto Rico, or Guam) - - is_network_testing = yes, no (yes if discipline from the science registry is 'CS.Network Testing and Monitoring' or port = 5001, 5101, or 5201) - - es_doc_id = hash of meta.id and the start time of the flow. If this id is used as the document id in elasticsearch, flows that are mistakenly input more than once will update existing documents rather than be added as duplicates. (NOTE: due to how netflow works, use es_doc_id as the ES document id only for sflow!) - -### 95-cleanup.conf - -Does small misc. tasks at the end like rename, remove, or convert fields - -### 98-post-process.conf - -Adds @exit_time and @processing_time (these are mainly for developers) - -### 99-output-rabbit.conf - -Sends results to a final RabbitMQ queue. (".disabled" can be removed from other output configs to send flows to other places) - -### Final Stage - -In the GlobalNOC-Netsage case, the output filter writes the flows to a network-specific RabbitMQ queue on another host and the last stage is a separate logstash pipeline on a 3rd host. The latter reads flows from the final queue using a rabbitmq input filter and sends it into elasticsearch using an elasticsearch output filter with a mapping template which sets data types for the fields. - -## Field names - -The fields used/created in Logstash (and saved to Elasticsearch) are listed in the [Elasticsearch doc](elastic). - - diff --git a/website/versioned_docs/version-1.2.11/pipeline/nfdump.md b/website/versioned_docs/version-1.2.11/pipeline/nfdump.md deleted file mode 100644 index b9519282..00000000 --- a/website/versioned_docs/version-1.2.11/pipeline/nfdump.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -id: nfdump -title: Sflow/Netflow Data Collection -sidebar_label: Sflow/Netflow Data ---- - -Sflow and Netflow export can be configured on appropriate network devices. Netsage uses tools in the Nfdump package to collect and process the resulting flow data. The toolset supports netflow v1, v5/v7, v9, IPFIX and SFLOW, IPv4 as well as IPv6. - -## Netsage Usage - -Nfcapd and/or sfcapd processes (from the nfdump package) are used to collect incoming netflow and/or sflow data and save it to disk in nfcapd files. The files are then read by the [importer](importer), which uses an nfdump command, and sent to RabbitMQ. From there, the [logstash](logstash) pipeline ingests the flows and processes them in exactly the same way as it processes tstat flows. The data is eventually saved in elasticsearch and visualized by [grafana dashboards](https://github.com/netsage-project/netsage-grafana-configs). - -One may also use the nfdump command interactively to view the flows in a nfcapd file in a terminal window. - -## Docker Deployment - -The nfdump/nfcapd/sfcapd processes can be invoked locally or using a Docker container. The Docker deployment of the Pipeline uses an nfdump Docker container. (See the Docker Deployment Guide.) The Docker image definitions can be found [HERE](https://github.com/netsage-project/docker-nfdump-collector) diff --git a/website/versioned_docs/version-1.2.11/pipeline/tstat.md b/website/versioned_docs/version-1.2.11/pipeline/tstat.md deleted file mode 100644 index baab97c5..00000000 --- a/website/versioned_docs/version-1.2.11/pipeline/tstat.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -id: tstat -title: Tstat Data Collection -sidebar_label: Tstat Data ---- - -## Netsage GitHub Project - -[Tstat](http://tstat.polito.it/) is a passive sniffer that provides insights into traffic patterns. The Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) project provides client programs to parse the captured data and send it to a rabbitmq host where it can then be processed by the [logstash pipeline](logstash), stored in elasticsearch, and finally displayed in our Grafana [dashboards](https://github.com/netsage-project/netsage-grafana-configs). - -## Docker - -Netsage Docker images exist on Docker Hub for tstat and tstat_transport. This is still in a beta state and is in development. The initial documentation is available [here](https://github.com/netsage-project/tstat-transport/blob/master/docs/docker.md). - - - diff --git a/website/versioned_sidebars/version-1.2.11-sidebars.json b/website/versioned_sidebars/version-1.2.11-sidebars.json deleted file mode 100644 index 40a8c9ac..00000000 --- a/website/versioned_sidebars/version-1.2.11-sidebars.json +++ /dev/null @@ -1,89 +0,0 @@ -{ - "version-1.2.11/Pipeline": [ - { - "collapsed": true, - "type": "category", - "label": "Pipeline", - "items": [ - { - "type": "doc", - "id": "version-1.2.11/pipeline/intro" - }, - { - "type": "doc", - "id": "version-1.2.11/pipeline/tstat" - }, - { - "type": "doc", - "id": "version-1.2.11/pipeline/nfdump" - }, - { - "type": "doc", - "id": "version-1.2.11/pipeline/importer" - }, - { - "type": "doc", - "id": "version-1.2.11/pipeline/logstash" - }, - { - "type": "doc", - "id": "version-1.2.11/pipeline/elastic" - } - ] - }, - { - "collapsed": true, - "type": "category", - "label": "Deployment", - "items": [ - { - "type": "doc", - "id": "version-1.2.11/deploy/choose_install" - }, - { - "type": "doc", - "id": "version-1.2.11/deploy/bare_metal_install" - }, - { - "type": "doc", - "id": "version-1.2.11/deploy/docker_install_simple" - }, - { - "type": "doc", - "id": "version-1.2.11/deploy/docker_install_advanced" - }, - { - "type": "doc", - "id": "version-1.2.11/deploy/docker_upgrade" - }, - { - "type": "doc", - "id": "version-1.2.11/deploy/docker_troubleshoot" - } - ] - }, - { - "collapsed": true, - "type": "category", - "label": "Development", - "items": [ - { - "type": "doc", - "id": "version-1.2.11/devel/dev_dataset" - }, - { - "type": "doc", - "id": "version-1.2.11/devel/docker_dev_guide" - }, - { - "type": "doc", - "id": "version-1.2.11/devel/docusaurus" - }, - { - "type": "doc", - "id": "version-1.2.11/devel/docker_dev_tag" - } - ] - } - ] -} diff --git a/website/versions.json b/website/versions.json index 5b4b29b1..2303a6b2 100644 --- a/website/versions.json +++ b/website/versions.json @@ -1,5 +1,4 @@ [ - "1.2.11", "1.2.10", "1.2.9", "1.2.8", diff --git a/website/yarn.lock b/website/yarn.lock index 7b43286a..3cfba086 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -2795,9 +2795,9 @@ caniuse-api@^3.0.0: lodash.uniq "^4.5.0" caniuse-lite@^1.0.0, caniuse-lite@^1.0.30000981, caniuse-lite@^1.0.30001109, caniuse-lite@^1.0.30001125, caniuse-lite@^1.0.30001181, caniuse-lite@^1.0.30001196: - version "1.0.30001205" - resolved "https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30001205.tgz#d79bf6a6fb13196b4bb46e5143a22ca0242e0ef8" - integrity sha512-TL1GrS5V6LElbitPazidkBMD9sa448bQDDLrumDqaggmKFcuU2JW1wTOHJPukAcOMtEmLcmDJEzfRrf+GjM0Og== + version "1.0.30001255" + resolved "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001255.tgz" + integrity sha512-F+A3N9jTZL882f/fg/WWVnKSu6IOo3ueLz4zwaOPbPYHNmM/ZaDUyzyJwS1mZhX7Ex5jqTyW599Gdelh5PDYLQ== ccount@^1.0.0, ccount@^1.0.3: version "1.1.0" From 9c176c8e9a20240df647a3b0a7077e7307f79668 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 9 Sep 2021 21:21:22 +0000 Subject: [PATCH 043/126] Making versioned docs for 1.2.11 again --- .../version-1.2.11/components/docker_env.md | 40 +++ .../components/docker_first_steps.md | 26 ++ .../components/docker_pipeline.md | 31 ++ .../deploy/bare_metal_install.md | 299 ++++++++++++++++++ .../version-1.2.11/deploy/choosing.md | 25 ++ .../deploy/docker_install_advanced.md | 216 +++++++++++++ .../deploy/docker_install_simple.md | 121 +++++++ .../deploy/docker_troubleshooting.md | 35 ++ .../version-1.2.11/deploy/docker_upgrade.md | 80 +++++ .../version-1.2.11/devel/docker.md | 83 +++++ .../devel/documentation_guide.md | 143 +++++++++ .../version-1.2.11/devel/pipeline_dataset.md | 34 ++ .../version-1.2.11/devel/tag.md | 113 +++++++ .../version-1.2.11/pipeline/elastic_search.md | 124 ++++++++ .../version-1.2.11/pipeline/importer.md | 14 + .../version-1.2.11/pipeline/intro.md | 37 +++ .../version-1.2.11/pipeline/logstash.md | 128 ++++++++ .../version-1.2.11/pipeline/nfdump.md | 17 + .../version-1.2.11/pipeline/tstat.md | 16 + .../version-1.2.11-sidebars.json | 89 ++++++ website/versions.json | 1 + 21 files changed, 1672 insertions(+) create mode 100644 website/versioned_docs/version-1.2.11/components/docker_env.md create mode 100644 website/versioned_docs/version-1.2.11/components/docker_first_steps.md create mode 100644 website/versioned_docs/version-1.2.11/components/docker_pipeline.md create mode 100644 website/versioned_docs/version-1.2.11/deploy/bare_metal_install.md create mode 100644 website/versioned_docs/version-1.2.11/deploy/choosing.md create mode 100644 website/versioned_docs/version-1.2.11/deploy/docker_install_advanced.md create mode 100644 website/versioned_docs/version-1.2.11/deploy/docker_install_simple.md create mode 100644 website/versioned_docs/version-1.2.11/deploy/docker_troubleshooting.md create mode 100644 website/versioned_docs/version-1.2.11/deploy/docker_upgrade.md create mode 100644 website/versioned_docs/version-1.2.11/devel/docker.md create mode 100644 website/versioned_docs/version-1.2.11/devel/documentation_guide.md create mode 100644 website/versioned_docs/version-1.2.11/devel/pipeline_dataset.md create mode 100644 website/versioned_docs/version-1.2.11/devel/tag.md create mode 100644 website/versioned_docs/version-1.2.11/pipeline/elastic_search.md create mode 100644 website/versioned_docs/version-1.2.11/pipeline/importer.md create mode 100644 website/versioned_docs/version-1.2.11/pipeline/intro.md create mode 100644 website/versioned_docs/version-1.2.11/pipeline/logstash.md create mode 100644 website/versioned_docs/version-1.2.11/pipeline/nfdump.md create mode 100644 website/versioned_docs/version-1.2.11/pipeline/tstat.md create mode 100644 website/versioned_sidebars/version-1.2.11-sidebars.json diff --git a/website/versioned_docs/version-1.2.11/components/docker_env.md b/website/versioned_docs/version-1.2.11/components/docker_env.md new file mode 100644 index 00000000..0bfe77ac --- /dev/null +++ b/website/versioned_docs/version-1.2.11/components/docker_env.md @@ -0,0 +1,40 @@ +Next, copy `env.example` to `.env` +```sh +cp env.example .env +``` + +then edit the .env file to set the sensor names to unique identifiers (with spaces or not, no quotes) +```sh +# Importer settings +sflowSensorName=My sflow sensor name +netflowSensorName=My netflow sensor name +``` + + - If you have only one collector, remove or comment out the line for the one you are not using. + - If you have more than one of the same type of collector, see the "Docker Advanced" documentation. + +:::note +These names uniquely identify the source of the data and will be shown in the Grafana dashboards. In elasticsearch, they are saved in the `meta.sensor_id` field. Choose names that are meaningful and unique. +For example, your sensor names might be "MyNet New York Sflow" and "MyNet Boston Netflow" or "MyNet New York - London" and "MyNet New York - Paris". Whatever makes sense in your situation. +::: + +You will also want to edit the **Logstash output rabbit queue** section. This section defines where the final data will land after going through the pipeline. By default, it will be written to a rabbitmq queue on `rabbit`, ie, the local rabbitMQ server running in the docker container. Enter a hostname to send to a remote rabbitMQ server (also the correct username, password, and queue key/name). + +```sh +rabbitmq_output_host=rabbit@mynet.edu +rabbitmq_output_username=guest +rabbitmq_output_pw=guest +rabbitmq_output_key=netsage_archive_input +``` +:::note +To send processed flow data to GlobalNOC at Indiana University, you will need to obtain settings for this section from your contact. A new queue may need to be set up at IU, as well as allowing traffic from your pipeline host. (At IU, data from the this final rabbit queue will be moved into an Elasticsearch instance for storage and viewing.) +::: + +The following options are described in the Docker Advanced section: + +**To drop all flows except those using the specfied interfaces**: Use if only some flows from a router are of interest and those can be identified by interface. + +**To change the sensor name for flows using a certain interface**: Use if you want to break out some flows coming into a port and give them a different sensor name. + +**To "manually" correct flow sizes and rates for sampling for specified sensors**: Use if sampling corrections are not being done automatically. Normally you do not need to use this, but check flows to be sure results are reasonable. + diff --git a/website/versioned_docs/version-1.2.11/components/docker_first_steps.md b/website/versioned_docs/version-1.2.11/components/docker_first_steps.md new file mode 100644 index 00000000..9a75fb05 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/components/docker_first_steps.md @@ -0,0 +1,26 @@ +#### saving this for now in case I need to put it back ####### + +Then checkout the latest version of the code. If you are a developer you'll want the latest version from master, otherwise please use make sure +you've checked out the latest tagged version. + +For example, +```sh +## Normal Deployment, eg, checkout version 1.2.8 +$ git fetch +$ git checkout v1.2.8 -b v1.2.8 + +## Developers +$ git fetch +$ git reset --hard origin/master +``` + +:::warning +git reset --hard will obliterate any changes. On initial installation, you should not have any, but if you do wish to save any state, please make sure you commit and backup to a feature branch before continuing + +Example: +```git commit -a -m "Saving local state"; git checkout -b feature/backup; git checkout master``` +::: + + +All instructions that follow assume these first steps were performed succesfully. If not, you'll likely run into errors down the line if the code doesn't match up with the instructions provided. + diff --git a/website/versioned_docs/version-1.2.11/components/docker_pipeline.md b/website/versioned_docs/version-1.2.11/components/docker_pipeline.md new file mode 100644 index 00000000..a0709f08 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/components/docker_pipeline.md @@ -0,0 +1,31 @@ +Start up the pipeline (all containers) using: + +```sh +# docker-compose up -d +``` + +This will also restart any containers/processes that have died. "-d" runs containers in the background. + +You can see the status of the containers and whether any have died (exited) using +```sh +# docker-compose ps +``` + +To check the logs for each of the containers, run + +```sh +# docker-compose logs +# docker-compose logs logstash +# docker-compose logs importer +etc. +``` + +Add `-f` or, e.g., `-f logstash` to see new log messages as they arrive. `--timestamps`, `--tail`, and `--since` are also useful -- look up details in Docker documentation. + +To shut down the pipeline (all containers) use + +```sh +# docker-compose down +``` + +Run all commands from the netsage-pipeline/ directory. diff --git a/website/versioned_docs/version-1.2.11/deploy/bare_metal_install.md b/website/versioned_docs/version-1.2.11/deploy/bare_metal_install.md new file mode 100644 index 00000000..c0c21510 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/deploy/bare_metal_install.md @@ -0,0 +1,299 @@ +--- +id: bare_metal_install +title: Manual Installation Guide +sidebar_label: Manual Installation +--- + +This document covers installing the NetSage Flow Processing Pipeline manually on a new machine (without using Docker). Steps should be followed below in order unless you know for sure what you are doing. This document assumes a RedHat Linux environment or one of its derivatives. + +## Data sources + +The Processing pipeline needs data to ingest in order to do anything. There are two types of data that can be consumed. + +1. sflow or netflow +2. tstat + +At least one of these must be set up on a sensor to provide the incoming flow data. + +Sflow and netflow data should be sent to ports on the pipeline host where nfcapd and/or sfcapd are ready to receive it. + +Tstat data should be sent directly to the logstash input RabbitMQ queue (the same one that the Importer writes to, if it is used). From there, the data will be processed the same as sflow/netflow data. + +## Installing the Prerequisites + +### Installing nfdump + +The nfdump package provides nfcapd and sfcapd processes which recieve flow data and write nfcapd files. +The Importer also uses nfdump. If you are only collecting tstat data, you do not need nfdump. + + +Nfdump is _not_ listed as a dependency of the Pipeline RPM package, as in a lot cases people are running special builds of nfdump -- but make sure you install it before you try running the Netflow Importer. If in doubt, `yum install nfdump` should work. +Flow data exported by some routers require a newer version of nfdump than the one in the CentOS repos; in these cases, it may be necessary to manually compile and install the lastest nfdump. + +:::note +It is recommended to check the version of nfdump used in the Docker installation and use the same or newer in order to be sure that any fixes for impactful issues are included. +::: + + +If desired, you can also install nfsen, which has a UI for viewing flow data and can manage starting and stopping all the nfcapd/sfcapd processes for you.The nfsen.conf file has a section in which to configure all the sources. + +### Installing RabbitMQ + +The pipeline requires a RabbitMQ server. Typically, this runs on the same server as the pipeline itself, but if need be, you can separate them (for this reason, the Rabbit server is not automatically installed with the pipeline package). + +```sh +[root@host ~]# yum install rabbitmq-server + +``` + +Typically, the default configuration will work. Perform any desired Rabbit configuration, then, start RabbitMQ: + +```sh +[root@host ~]# /sbin/service rabbitmq-server start + or # systemctl start rabbitmq-server.service +``` + +### Installing Logstash + +See the logstash documentation. We are currently using Version 7.10. + +### Installing the EPEL repo + +Some of our dependencies come from the EPEL repo. To install this: + +``` +[root@host ~]# yum install epel-release +``` + +### Installing the GlobalNOC Open Source repo + +The Pipeline package (and its dependencies that are not in EPEL) are in the GlobalNOC Open Source Repo. + +For Red Hat/CentOS 6, create `/etc/yum.repos.d/grnoc6.repo` with the following content. + +``` +[grnoc6] +name=GlobalNOC Public el6 Packages - $basearch +baseurl=https://repo-public.grnoc.iu.edu/repo/6/$basearch +enabled=1 +gpgcheck=1 +gpgkey=https://repo-public.grnoc.iu.edu/repo/RPM-GPG-KEY-GRNOC6 +``` + +For Red Hat/CentOS 7, create `/etc/yum.repos.d/grnoc7.repo` with the following content. + +``` +[grnoc7] +name=GlobalNOC Public el7 Packages - $basearch +baseurl=https://repo-public.grnoc.iu.edu/repo/7/$basearch +enabled=1 +gpgcheck=1 +gpgkey=https://repo-public.grnoc.iu.edu/repo/RPM-GPG-KEY-GRNOC7 +``` + +The first time you install packages from the repo, you will have to accept the GlobalNOC repo key. + +## Installing the Pipeline (Importer and Logstash configs) + +Install it like this: + +``` +[root@host ~]# yum install grnoc-netsage-deidentifier +``` + +Pipeline components: + +1. Flow Filter - GlobalNOC uses this for Cenic data to filter out some flows. Not needed otherwise. +2. Netsage Netflow Importer - required to read nfcapd files from sflow and netflow importers. (If using tstat flow sensors only, this is not needed.) +3. Logstash - be sure the number of logstash pipeline workers in /etc/logstash/logstash.yml is set to 1 or flow stitching/aggregation will not work right! +4. Logstash configs - these are executed in alphabetical order. See the Logstash doc. At a minimum, the input, output, and aggregation configs have parameters that you will need to update or confirm. + +Nothing will automatically start after installation as we need to move on to configuration. + +## Importer Configuration + +Configuration files of interest are + - /etc/grnoc/netsage/deidentifier/netsage_shared.xml - Shared config file allowing configuration of collections, and Rabbit connection information + - /etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml - other settings + - /etc/grnoc/netsage/deidentifier/logging.conf - logging config + - /etc/grnoc/netsage/deidentifier/logging-debug.conf - logging config with debug enabled + +### Setting up the shared config file + +`/etc/grnoc/netsage/deidentifier/netsage_shared.xml` + +There used to be many perl-based pipeline components and daemons. At this point, only the importer is left, the rest having been replaced by logstash. The shared config file, which was formerly used by all the perl components, is read before reading the individual importer config file. + +The most important part of the shared configuration file is the definition of collections. Each sflow or netflow sensor will have its own collection stanza. Here is one such stanza, a netflow example. Instance and router-address can be left commented out. + +``` + + + /path/to/netflow-files/ + + + Netflow Sensor 1 + + + sflow + + + + + + + + +``` + +Having multiple collections in one importer can sometimes cause issues for aggregation, as looping through the collections one at a time adds to the time between the flows, affecting timeouts. You can also set up multiple Importers with differently named shared and importer config files and separate init.d files. + +There is also RabbitMQ connection information in the shared config, though queue names are set in the Importer config. (The Importer does not read from a rabbit queue, but other old components did, so both input and output are set.) + +Ideally, flows should be deidentified before they leave the host on which the data is stored. If flows that have not be deidentified need to be pushed to another node for some reason, the Rabbit connection must be encrypted with SSL. + +If you're running a default RabbitMQ config, which is open only to 'localhost' as guest/guest, you won't need to change anything here. + +``` + + + 127.0.0.1 + 5672 + guest + guest + 0 + 100 + / + 1 + + + + 127.0.0.1 + 5672 + guest + guest + 0 + 100 + / + 1 + +``` + +### Setting up the Importer config file + +`/etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml` + +This file has a few more setting specific to the Importer component which you may like to adjust. + + - Rabbit_output has the name of the output queue. This should be the same as that of the logstash input queue. + - (The Importer does not actually use an input rabbit queue, so we add a "fake" one here.) + - Min-bytes is a threshold applied to flows aggregated within one nfcapd file. Flows smaller than this will be discarded. + - Min-file-age is used to be sure files are complete before being read. + - Cull-enable and cull-ttl can be used to have nfcapd files older than some number of days automatically deleted. + - Pid-file is where the pid file should be written. Be sure this matches what is used in the init.d file. + - Keep num-processes set to 1. + +```xml + + + + + + netsage_deidentifier_netflow_fake + 2 + + + + 3 + netsage_deidentifier_raw + + + + + 100 + + + 1 + + + + + + /var/cache/netsage/netflow_importer.cache + + + + 100000000 + + + 10m + + + + + + + + + + + + + /var/run/netsage-netflow-importer-daemon.pid + + + +``` + +## Logstash Setup Notes + +Standard logstash filter config files are provided with this package. Most should be used as-is, but the input and output configs may be modified for your use. + +The aggregation filter also has settings that may be changed as well - check the two timeouts and the aggregation maps path. + +When upgrading, these logstash configs will not be overwritten. Be sure any changes get copied into the production configs. + +FOR FLOW STITCHING/AGGREGATION - IMPORTANT! +Flow stitching (ie, aggregation) will NOT work properly with more than ONE logstash pipeline worker! +Be sure to set "pipeline.workers: 1" in /etc/logstash/logstash.yml and/or /etc/logstash/pipelines.yml. When running logstash on the command line, use "-w 1". + +## Start Logstash + +```sh +[root@host ~]# /sbin/service logstash start + or # systemctl start logstash.service +``` +It will take couple minutes to start. Log files are normally /var/log/messages and /var/log/logstash/logstash-plain.log. + +When logstash is stopped, any flows currently "in the aggregator" will be written out to /tmp/logstash-aggregation-maps (or the path/file set in 40-aggregation.conf). These will be read in and deleted when logstash is started again. + +## Start the Importer + +Typically, the daemons are started and stopped via init script (CentOS 6) or systemd (CentOS 7). They can also be run manually. The daemons all support these flags: + +`--config [file]` - specify which config file to read + +`--sharedconfig [file]` - specify which shared config file to read + +`--logging [file]` - the logging config + +`--nofork` - run in foreground (do not daemonize) + +```sh +[root@host ~]# /sbin/service netsage-netflow-importer start + or # systemctl start netsage-netflow-importer.service +``` +The Importer will create a deamon process and a worker process. When stopping the service, the worker process might take a few minutes to quit. If it does not quit, kill it by hand. + + +## Cron jobs + +Sample cron files are provided. Please review and uncomment their contents. These periodically download MaxMind, CAIDA, and Science Registry files, and also restart logstash. Logstash needs to be restarted in order for any updated files to be read in. + + + diff --git a/website/versioned_docs/version-1.2.11/deploy/choosing.md b/website/versioned_docs/version-1.2.11/deploy/choosing.md new file mode 100644 index 00000000..43ae4429 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/deploy/choosing.md @@ -0,0 +1,25 @@ +--- +id: choose_install +title: Choosing an Installation Procedure +sidebar_label: Choose Install +--- + +## Manual or BareMetal Installation + +The Manual (baremetal) Installation Guide will walk you through installing the pipeline using your own server infrastructure and requires you to maintain all the components involved. + +It will likely be a bit better when it comes to performance, and have greater flexibility, but there is also more complexity involved in configuring and setting up. + +If you are the ultimate consumer of the data then setting up a baremetal version might be worth doing. Or at least the final rabbitMQ that will be holding the data since it'll like need to handle a large dataset. + +## Dockerized Version + +The Docker version makes it trivial to bring up the pipeline for both a developer and consumer. The work is mostly already done for you. It should be a simple matter of configuring a few env settings and everything should 'just' work. + +If you are simply using the pipeline to deliver the anonymized network stats for someone else's consumption, then using the docker pipeline would be preferred. + +## Choose your adventure + +- [Manual/Server Installation](bare_metal_install) +- [Simple Docker](docker_install_simple.md) - 1 netflow sensor and/or 1 sflow sensor +- [Advanced Docker](docker_install_advanced.md) - options that allow for more complex configurations diff --git a/website/versioned_docs/version-1.2.11/deploy/docker_install_advanced.md b/website/versioned_docs/version-1.2.11/deploy/docker_install_advanced.md new file mode 100644 index 00000000..bc84b812 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/deploy/docker_install_advanced.md @@ -0,0 +1,216 @@ +--- +id: docker_install_advanced +title: Docker Advanced Options Guide +sidebar_label: Docker Advanced Options +--- + +If the basic Docker Installation does not meet your needs, the following customizations will allow for more complex situations. Find the section(s) which apply to you. + +*Please first read the Docker Installation guide in detail. This guide will build on top of that.* + + +## To Add an Additional Sflow or Netflow Collector + +If you have more than 1 sflow and/or 1 netflow sensor, you will need to create more collectors and modify the importer config file. The following instructions describe the steps needed to add one additional sensor. + +Any number of sensors can be accomodated, although if there are more than a few being processed by the same Importer, you may run into issues where long-lasting flows from sensosr A time out in the aggregation step while waiting for flows from sensors B to D to be processed. (Another option might be be to run more than one Docker deployment.) + + +#### a. Edit docker-compose.override.yml + +The pattern to add a flow collector is always the same. To add an sflow collector called example-collector, edit the docker-compose.override.yml file and add something like + +```yaml + example-collector: + image: netsage/nfdump-collector:alpine-1.6.23 + restart: always + command: sfcapd -T all -l /data -S 1 -w -z -p 9997 + volumes: + - ./data/input_data/example:/data + ports: + - "9997:9997/udp" +``` + +- collector name: should be updated to something that has some meaning, in our example "example-collector". +- image: copy from the default collector sections already in the file. +- command: choose between "sfcapd" for sflow and "nfcapd" for netflow, and at the end of the command, specify the port to watch for incoming flow data. +- volumes: specify where to write the nfcapd files. Make sure the path is unique and in ./data/. In this case, we're writing to ./data/input_data/example. Change "example" to something meaningful. +- ports: make sure the port here matches the port you've set in the command. Naturally all ports have to be unique for this host and the router should be configured to export data to the same port. (?? If the port on your docker container is different than the port on your host/local machine, use container_port:host_port.) + +Make sure the indentation is right or you'll get an error about yaml parsing. + +You will also need to uncomment these lines: + +```yaml + volumes: + - ./userConfig/netsage_override.xml:/etc/grnoc/netsage/deidentifier/netsage_shared.xml +``` + + +#### b. Edit netsage_override.xml + +To make the Pipeline Importer aware of the new data to process, you will need to create a custom Importer configuration: netsage_override.xml. This will replace the usual config file netsage_shared.xml. + +```sh +cp compose/importer/netsage_shared.xml userConfig/netsage_override.xml +``` + +Edit netsage_override.xml and add a new "collection" section for the new sensor as in the following example. The flow-path should match the path set above in docker-compose.override.yml. $exampleSensorName is a new "variable"; don't replace it here, it will be replaced with a value that you set in the .env file. For the flow-type, enter "sflow" or "netflow" as appropriate. (Enter "netflow" if you're running IPFIX.) + +```xml + + /data/input_data/example/ + $exampleSensorName + sflow + +``` + +#### c. Edit environment file + +Then, in the .env file, add a line that sets a value for the "variable" you referenced above, $exampleSensorName. The value is the name of the sensor which will be saved to elasticsearch and which appears in Netsage Dashboards. Set it to something meaningful and unique. E.g., + +```ini +exampleSensorName=MyNet Los Angeles sFlow +``` + + +#### d. Running the new collector + +After doing the setup above and selecting the docker version to run, you can start the new collector by running the following line, using the collector name (or by running `docker-compose up -d` to start up all containers): + +```sh +docker-compose up -d example-collector +``` + +## To Keep Only Flows From Certain Interfaces +If your sensors are exporting all flows, but only those using a particular interface are relevant, use this option in the .env file. The collectors and importer will save/read all incoming flows, but the logstash pipeline will drop those that do not have src_ifindex OR dst_inindex equal to one of those listed. + +In the .env file, uncomment lines in the appropriate section and enter the information required. Be sure `ifindex_filter_flag=True` with "True" capitalized as shown, any sensor names are spelled exactly right, and list all the ifindex values of flows that should be kept and processed. Some examples (use just one!): + +```sh +ifindex_filter_keep=123 +ifindex_filter_keep=123,456 +ifindex_filter_keep=Sensor 1: 789 +ifindex_filter_keep=123; Sensor 1: 789; Sensor 2: 800, 900 +``` + +In the first case, all flows that have src_ifindex = 123 or dst_ifindex = 123 will be kept, regardless of sensor name. (Note that this may be a problem if you have more than 1 sensor with the same ifindex values!) +In the 2nd case, if src or dst ifindex is 123 or 456, the flow will be processed. +In the 3rd case, only flows from Sensor 1 will be filtered, with flows using ifindex 789 kept. +In the last example, any flow with ifindex 123 will be kept. Sensor 1 flows with ifindex 789 (or 123) will be kept, and those from Sensor 2 having ifindex 800 or 900 (or 123) will be kept. + +Spaces don't matter except within the sensor names. Punctuation is required as shown. + + +## To Change a Sensor Name Depending on the Interface Used +In some cases, users want to keep all flows from a certain sensor but differentiate between those that enter or exit through specific sensor interfaces. This can be done by using this option in the .env file. + +In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all 4 fields are set properly! For example, + +```sh +ifindex_sensor_rename_flag=True +ifindex_sensor_rename_old_name=IU Sflow +ifindex_sensor_rename_new_name=IU Bloomington Sflow +ifindex_sensor_rename_ifindex=10032 +``` + +In this case, any flows from the "IU Sflow" sensor that use interface 10032 (src_ifindex = 10032 OR dst_ifindex = 10032) will have the sensor name changed from "IU Sflow" to "IU Bloomington Sflow". Currently, only one such rename can be configured in Docker and only 1 ifindex is allowed. + +:::note +Please notify the devs at IU in advance, if you need to modify a sensor name, because the regexes used for determining sensor_group and sensor_type may have to be updated. +::: + +## To Do Sampling Rate Corrections in Logstash +When flow sampling is done, corrections have to be applied. For example, if you are sampling 1 out of 100 flows, for each flow measured, it is assumed that in reality there would be 100 flows of that size with that src and dst, so the number of bits (and the number of packets, bits/s and packets/s) is multiplied by 100. Usually the collector (nfcapd or sfcapd process) gets the sampling rate from the incoming data and applies the correction, but in some cases, the sensor may not send the sampling rate, or there may be a complex set-up that requires a manual correction. With netflow, a manual correction can be applied using the '-s' option in the nfsen config, if nfsen is being used, or the nfcapd command, but this is not convenient when using Docker. For sflow, there is no such option. In either case, the correction can be made in logstash as follows. + +In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all 3 fields are set properly! The same correction can be applied to multiple sensors by using a comma-separed list. The same correction applies to all listed sensors. For example, + +```sh +sampling_correction_flag=True +sampling_correction_sensors=IU Bloomington Sflow, IU Sflow +sampling_correction_factor=512 +``` + +## To Change How Long Nfcapd Files Are Kept +The importer will automatically delete older nfcapd files for you, so that your disk doesn't fill up. By default, 3 days worth of files will be kept. This can be adjusted by making a netsage_override.xml file: + +```sh +cp compose/importer/netsage_shared.xml userConfig/netsage_override.xml +``` + +At the bottom of the file, edit this section to set the number of days worth of files to keep. Set cull-enable to 0 for no culling. Eg, to save 1 days worth of data: +````xml + + 1 + 1 + +```` + +You will also need to uncomment these lines in docker-compose.override.yml: + +```yaml + volumes: + - ./userConfig/netsage_override.xml:/etc/grnoc/netsage/deidentifier/netsage_shared.xml +``` + + +## To Save Flow Data to a Different Location + +By default, data is saved to subdirectories in the ./data/ directory (ie, the data/ directory in the git checkout). If you would like to use a different location, there are two options. + +1. The best solution is to create a symlink between ./data/ and the preferred location, or, for an NFS volume, export it as ${PROJECT_DIR}/data. + +During installation, delete the data/ directory (it should only contain .placeholder), then create your symlink. Eg, to use /var/netsage/ instead of data/, +```sh +cd {netsage-pipeline dir} +mkdir /var/netsage +rm data/.placeholder +rmdir data +ln -s /var/netsage {netsage-pipeline dir}/data +``` +(Check the permissions of the directory.) + +2. Alternatively, update volumes in docker-compose.yml and docker-compose.override.yml Eg, to save nfcapd files to subdirs in /mydir, set the collector volumes to `- /mydir/input_data/netflow:/data` (similarly for sflow) and set the importer and logstash volumes to `- /mydir:/data`. + +:::warning +If you choose to update the docker-compose file, keep in mind that those changes will cause a merge conflict or be wiped out on upgrade. +You'll have to manage the volumes exported and ensure all the paths are updated correctly for the next release manually. +::: + +## To Customize Java Settings / Increase Memory Available for Lostash + + +If cpu or memory seems to be a problem, try increasing the JVM heap size for logstash from 2GB to 3 or 4, no more than 8. + +To do this, edit LS_JAVA_OPTS in the .env file. +```yaml +LS_JAVA_OPTS=-Xmx4g -Xms4g +``` + +Here are some tips for adjusting the JVM heap size (https://www.elastic.co/guide/en/logstash/current/jvm-settings.html): + +- Set the minimum (Xms) and maximum (Xmx) heap allocation size to the same value to prevent the heap from resizing at runtime, which is a very costly process. +- CPU utilization can increase unnecessarily if the heap size is too low, resulting in the JVM constantly garbage collecting. You can check for this issue by doubling the heap size to see if performance improves. +- Do not increase the heap size past the amount of physical memory. Some memory must be left to run the OS and other processes. As a general guideline for most installations, don’t exceed 50-75% of physical memory. The more memory you have, the higher percentage you can use. + +To modify other logstash settings, rename the provided example file for JVM Options and tweak the settings as desired: + +```sh +cp userConfig/jvm.options_example userConfig/jvm.options +``` + +Also update the docker-compose.override.xml file to uncomment lines in the logstash section. It should look something like this: + +```yaml +logstash: + image: netsage/pipeline_logstash:latest + volumes: + - ./userConfig/jvm.options:/usr/share/logstash/config/jvm.options +``` + +## To Bring up Kibana and Elasticsearch Containers + +The file docker-compose.develop.yaml can be used in conjunction with docker-compose.yaml to bring up the optional Kibana and Elastic Search components. + +This isn't a production pattern but the tools can be useful at times. Please refer to the [Docker Dev Guide](../devel/docker_dev_guide#optional-elasticsearch-and-kibana) + diff --git a/website/versioned_docs/version-1.2.11/deploy/docker_install_simple.md b/website/versioned_docs/version-1.2.11/deploy/docker_install_simple.md new file mode 100644 index 00000000..c4216138 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/deploy/docker_install_simple.md @@ -0,0 +1,121 @@ +--- +id: docker_install_simple +title: Docker Installation Guide +sidebar_label: Docker Installation +--- +In this deployment guide, you will learn how to deploy a basic Netsage setup that includes one sflow and/or one netflow collector. If you have more than one collector of either type, or other special situations, see the Docker Advanced guide. + +The Docker containers included in the installation are + - rabbit (the local RabbitMQ server) + - sflow-collector (receives sflow data and writes nfcapd files) + - netflow-collector (receives netflow data and writes nfcapd files) + - importer (reads nfcapd files and puts flows into a local rabbit queue) + - logstash (logstash pipeline that processes flows and sends them to their final destination, by default a local rabbit queue) + - ofelia (cron-like downloading of files used by the logstash pipeline) + +The code and configs for the importer and logstash pipeline can be viewed in the netsage-project/netsage-pipeline github repo. See netsage-project/docker-nfdump-collector for code related to the collectors. + + +### 1. Set up Data Sources +The data processing pipeline needs data to ingest in order to do anything, of course. There are three types of data that can be consumed. + + - sflow + - netflow + - tstat + +At least one of these must be set up on a *sensor* (i.e., flow *exporter* / router), to provide the incoming flow data. +You can do this step later, but it will helpful to have it working first. + +Sflow and netflow data should be exported to the pipeline host where there will be *collectors* (nfcapd and/or sfcapd processes) ready to receive it (see below). To use the default settings, send sflow to port 9998 and netflow/IPFIX to port 9999. On the pipeline host, allow incoming traffic from the flow exporters, of course. + +Tstat data should be sent directly to the logstash input rabbit queue "netsage_deidentifier_raw" on the pipeline host. No collector is needed for tstat data. See the netsage-project/tstat-transport repo. (From there, logstash will grab the data and process it the same way as it processes sflow/netflow data. (See the Docker Advanced guide.) + +### 2. Set up a Pipeline Host +Decide where to run the Docker Pipeline and get it set up. Adjust iptables to allow the flow exporters (routers) to send flow data to the host. + +Install Docker Engine (docker-ce, docker-ce-cli, containerd.io) - see instructions at [https://docs.docker.com/engine/install/](https://docs.docker.com/engine/install/). + +Install Docker Compose from Docker's GitHub repository - see [https://docs.docker.com/compose/install/](https://docs.docker.com/compose/install/). You need to **specify version 1.29.2** (or newer) in the curl command. + +Check default file permissions. If the *logstash* user is not able to access the logstash config files in the git checkout, you'll get an error from logstash saying there are no .conf files found even though they are there. Various components also need to be able to read and write to the data/ directory in the checkout. Defaults of 775 (u=rwx, g=rwx, o=rx) should work. + +### 3. Clone the Netsage Pipeline Project + +Clone the netsage-pipeline project from github. +```sh +git clone https://github.com/netsage-project/netsage-pipeline.git +``` + +When the pipeline runs, it uses the logstash conf files that are in the git checkout (in conf-logstash/), as well as a couple other files like docker-compose.yml, so it is important to checkout the correct version. + +Move into the netsage-pipeline/ directory (**all git and docker commands must be run from inside this directory!**), then checkout the most recent version of the code. It will say you are in 'detached HEAD' state if you don't include -b. +```sh +git checkout {tag} +``` +Replace "{tag}" with the release version you intend to use, e.g., "v1.2.11". ("Master" is the development version and is not intended for general use!) +`git status` will confirm which branch you are on, e.g., master or v1.2.11. + +### 4. Create Docker-compose.override.yml + +Information in the `docker-compose.yml` file tells docker which containers (processes) to run and sets various parameters for them. +Settings in the `docker-compose.override.yml` file will overrule and add to those. Note that docker-compose.yml should not be edited since upgrades will replace it. Put all customizations in the override file, since override files will not be overwritten. + +Collector settings may need to be edited by the user, so the information that docker uses to run the collectors is specified (only) in the override file. Therefore, docker-compose_override.example.yml must always be copied to docker-compose_override.yml. + +```sh +cp docker-compose.override_example.yml docker-compose.override.yml +``` + +By default docker will bring up a single sflow collector and a single netflow collector that listen to udp traffic on ports localhost:9998 and 9999. If this matches your case, you don't need to make any changes to the docker-compose.override_example.yml. + +- If you have only one collector, remove or comment out the section for the one not needed so the collector doesn't run and simply create empty nfcapd files. +- If the collectors need to listen to different ports, make the appropriate changes here in both the "command:" and "ports:" lines. +- By default, the collectors will save flows to nfcapd files in sflow/ and netflow/ subdirectories in `./data/input_data/` (i.e., the data/ directory in the git checkout). If you need to save the data files to a different location, see the Docker Advanced section. + +Other lines in this file you can ignore for now. + +:::note +If you run into issues, try removing all the comments in the override file as they may conflict with the parsing done by docker-compose, though we have not found this to be a problem. +::: + +### 5. Choose Pipeline Version + +Once you've created the docker-compose.override.xml file and finished adjusting it for any customizations, you're ready to select which image versions Docker should run. + +```sh +./scripts/docker_select_version.sh +``` +When prompted, select the **same version** you checked out earlier. + +This script will replace the version numbers of docker images in docker-compose.override.yml and docker-compose.yml with the correct values. + +### 6. Create Environment File + +{@import ../components/docker_env.md} + +## Testing the Collectors + +At this point, you can start the two flow collectors by themselves by running the following line. If you only need one of the collectors, remove the other from this command. + +(See the next section for how to start all the containers, including the collectors.) + +```sh +docker-compose up -d sflow-collector netflow-collector +``` + +Subdirectories for sflow/netflow, year, month, and day are created automatically under `data/input_data/`. File names contain dates and times. +These are not text files; to view the contents, use an [nfdump command](http://www.linuxcertif.com/man/1/nfdump/) (you will need to install nfdump). +Files will be deleted automatically by the importer as they age out (the default is to keep 3 days). + +If the collector(s) are running properly, you should see nfcapd files being written every 5 minutes and they should have sizes of more than a few hundred bytes. (Empty files still have header and footer lines.) +See Troubleshooting if you have problems. + +To stop the collectors +```sh +docker-compose down +``` + +## Running the Collectors and Pipeline + +{@import ../components/docker_pipeline.md} + diff --git a/website/versioned_docs/version-1.2.11/deploy/docker_troubleshooting.md b/website/versioned_docs/version-1.2.11/deploy/docker_troubleshooting.md new file mode 100644 index 00000000..7cfc2690 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/deploy/docker_troubleshooting.md @@ -0,0 +1,35 @@ +--- +id: docker_troubleshoot +title: Docker Troubleshooting +sidebar_label: Troubleshooting +--- + +## Troubleshooting + +### If you are not seeing flows after installation + +**Troubleshooting checklist:** + +- Use `docker-compose ps` to be sure the collectors (and other containers) are running. +- Make sure you configured your routers to point to the correct address/port where the collector is running.  +- Check iptables on your pipeline host to be sure incoming traffic from the routers is allowed. +- Check to see if nfcapd files are being written. There should be a directory for the year, month, and day in netsage-pipeline/data/input_data/netflow/ or sflow/, and files should be larger than a few hundred bytes. If the files exist but are too small, the collector is running but there are no incoming flows. "nfdump -r filename" will show the flows in a file (you may need to install nfdump). +- Make sure you created .env and docker-compose.override.yml files and updated the settings accordingly, sensorName especially since that identifies the source of the data. +- Check the logs of the various containers to see if anything jumps out as being invalid.  `docker-compose logs $service`, where $service is logstash, importer, rabbit, etc. +- If the final rabbit queue is on an external host, check the credentials you are using and whether iptables on that host allows incoming traffic from your pipeline host. + +### If flow collection stops + +**Errors:** +- See if any of the containers has died using `docker ps` +- Check the logs of the various containers to see if anything jumps out as being invalid. Eg, `docker-compose logs logstash`. +- If logstash dies with an error about not finding \*.conf files, make sure conf-logstash/ and directories and files within are readable by everyone (and directories are executable by everyone). The data/ directory and subdirectories need to be readable and writable by everyone, as well. + +**Disk space:** +- If the pipeline suddenly fails, check to see if the disk is full. If it is, first try getting rid of old docker images and containers to free up space: `docker image prune -a` and `docker container prune`. +- Also check to see how much space the nfcapd files are consuming. You may need to add more disk space. You could also try automatically deleting nfcapd files after a fewer number of days (see Docker Advanced). + +**Memory:** +- If you are running a lot of data, sometimes docker may need to be allocated more memory. The most +likely culprit is logstash (java) which is only allocated 2GB of RAM by default. Please see the Docker Advanced guide for how to change. + diff --git a/website/versioned_docs/version-1.2.11/deploy/docker_upgrade.md b/website/versioned_docs/version-1.2.11/deploy/docker_upgrade.md new file mode 100644 index 00000000..d640d12a --- /dev/null +++ b/website/versioned_docs/version-1.2.11/deploy/docker_upgrade.md @@ -0,0 +1,80 @@ +--- +id: docker_upgrade +title: Upgrading +sidebar_label: Docker - Upgrading +--- + +To upgrade a previous installment of the Dockerized pipeline, perform the following steps. + +### Shut things down + +```sh +cd {netsage-pipeline directory} +docker-compose down +``` +This will stop and remove all the docker containers, including the importer, logstash, and any collectors. Note that incoming flow data will not be saved during the time the collectors are down. + +### Update Source Code + +To upgrade to a new release, pull new tags/code from github and docker images from dockerhub. Your customized .env and override files will not be overwritten, nor will data files, cache files, or downloaded support files. + +```sh +git reset --hard +git pull origin master +``` + +:::warning +git reset --hard will obliterate any changes you have made to non-override files, eg, logstash conf files. If necessary, please make sure you commit and save to a feature branch before continuing. +::: + +Run these three commands to select the new release you want to run. In the first, replace "{tag}" by the version to run (eg, v1.2.11). When asked by the third, select the same version as the tag you checked out. +```sh +git checkout -b {tag} +git pull +./scripts/docker_select_version.sh +``` +The docker-compose.yml and docker-compose.override.yml should both now have the version number you selected for pipeline_importer and pipeline_logstash. + +### Check/Update Customization Files +Occasionally, something may change which will necessitate editing your override and/or env file. + +- Compare the new `docker-compose.override_example.yml` file to your `docker-compose.override.yml`. Be sure to check to see if the version of nfdump has changed. Look for lines like `image: netsage/nfdump-collector:`. Make sure the version in your override file matches what is the example file. (You do not need to actually perform any upgrade yourself. This will ensure the correct version is pulled from Docker Hub.) + +- Also, look for`version: "x.x"` at the top. If the version number is different, change it in your docker-compose.override.yml file. (This is the Compose file format version.) + + +- Compare your `.env` file with the new `env.example` file to see if any new lines or sections have been added. If there have been any changes relevant to your deployment, eg, new options you want to use, copy the changes into your .env file. + +- If you used the Docker Advanced guide to make a `netsage_override.xml` file, compare it to `netsage_shared.xml` to see if there are any changes. This is unlikely. + + +### Update Docker Containers + +This should be done automatically when you start up the conctainers, but you can also pull new images from Docker Hub now. + +``` +docker-compose pull +``` + +### Restart all the Docker Containers + +``` +docker-compose up -d +``` + +This will start all the services/containers listed in the docker-compose.yml and docker-compose.override.yml files, including the importer, logstash pipeline, and collectors. + +### Delete old images and containers + +To keep things tidy, delete any old images and containers that are not being used. + +``` +docker image prune -a +docker container prune +``` + +To check which images you have +``` +docker image ls +``` + diff --git a/website/versioned_docs/version-1.2.11/devel/docker.md b/website/versioned_docs/version-1.2.11/devel/docker.md new file mode 100644 index 00000000..21cb7d5c --- /dev/null +++ b/website/versioned_docs/version-1.2.11/devel/docker.md @@ -0,0 +1,83 @@ +--- +id: docker_dev_guide +title: Docker Dev Guide +sidebar_label: Docker Dev Guide +--- + +## Selecting a Version + +You can use the "master" version or a tagged version. +To select a released version use the docker_select_version.sh script (see the Deployment Guide). +If you wish to use the development version (master branch) simply skip the docker_select_version.sh step. + +## Installing + +See the Deployment Guide to learn how to set up collectors, your environment and override files, etc. + +## Importer + +The importer "shared" config that Docker uses is defined in compose/netsage_shared.xml. ** NOTE: If you want to make changes to this file, you will need to rebuild the container** + +## Build Images + +The images are published on Docker Hub, but if you'd like to incorporate local changes please follow the process below. + +### Build Using Source Code + +If you would like to build the *importer* container using the version of the pipeline scripts found in the GitHub repo then run the following: + +```sh +docker-compose -f docker-compose.build.yml build + +``` + +NOTE: The importer container includes the config files for the logstash pipeline. + + +## Optional: ElasticSearch and Kibana + +You can optionally store flow data locally in an ElasticSearch container and view the data with Kibana. Local storage can be enabled with the following steps: + +1. Uncomment the following lines in conf-logstash/99-outputs.conf: + +``` +elasticsearch { + hosts => ["elasticsearch"] + index => "netsage_flow-%{+YYYY.MM.dd}" +} +``` + +2. Comment out the `rabbitmq {...}` block in conf-logstash/99-outputs.conf if you do not want to also send logstash output to RabbitMQ. + +3. Run the containers using the following line: ` ` ` docker-compose -f docker-compose.yml -f docker-compose.develop.yml up -d ` ` ` + +## Handy Docker Commands + +### Start the Containers + +``` sh +docker-compose up -d +``` + +### Stop the Containers + +``` sh +docker-compose stop && docker-compose rm +``` + +### Enter a Container Shell + +``` sh +docker-compose exec logstash bash #bash shell in logstash container +docker-compose exec importer bash #bash shell in importer container +docker-compose exec rabbit bash #bash shell in rabbit container +``` + +### View Container Logs + +``` sh +docker-compose logs -f #view logs for all containers +docker-compose logs -f logstash #view logs for logstash container +docker-compose logs -f importer #view logs for importer container +docker-compose logs -f rabbit #view logs for rabbit container +``` diff --git a/website/versioned_docs/version-1.2.11/devel/documentation_guide.md b/website/versioned_docs/version-1.2.11/devel/documentation_guide.md new file mode 100644 index 00000000..076628b2 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/devel/documentation_guide.md @@ -0,0 +1,143 @@ +--- +id: docusaurus +title: Revising Documentation +sidebar_label: Docusaurus +--- + +This project's documentation uses Docusaurus. + +Docusaurus converts markdown into html and builds a static website using React UI components, which can be exported to a webserver. + +Yarn is a package manager for JavaScript and replaces the npm client. It is not strictly necessary but highly encouraged. + +To extend the docs simply create a markdown file and reference the ID in the side bar config. Please see the related documentation +at the [docusaurus 2](https://v2.docusaurus.io/) project website. + +*THE FOLLOWING INSTRUCTIONS ARE NOT CONFIRMED TO WORK. PLEASE UPDATE WITH CORRECTIONS.* + +## If Not Using Docker +These are instructions for editing and releasing docs without using Docker. + +### Installation + +To get started the first time, install npm, then use that to install yarn +``` +$ sudo yum install npm +$ sudo npm install -g yarn +``` + +Git clone the netsage pipeline project, then run yarn install to get all the dependencies listed within package.json +``` +$ cd netsage-pipeline/website +$ yarn install +``` + +### If Local Development + +If you are working on your local machine, rather than sshing into a host, you can view changes to the docs in a browser as you work. Use the following commands to generate the static website content (gets written into the build directory), then start a local development server and open up a browser window in which to view the docs. Most changes you make will be reflected live without having to restart the server. +``` +$ yarn build +$ yarn start +go to http://localhost:3000 +``` + +### To Make Changes +Whether on a local machine or a linux host, to make changes, edit the files in website/docs/. +When finished, git add, git commit, git push, as usual. +Repeat as needed. + +To view the changes you've made with some formatting, just go to the file on github in a browser. To see all of the formatting, read the "Deploying Docs to github.io" section below. + +### Tagging a New release + +When it's time to release a new version of the Pipeline, you need to create a new version of the docs as well. + +Once the documentation is stable and you don't forsee any new change, please do the following: + +``` +$ yarn run docusaurus docs:version a.b.c +``` + +replacing a.b.c with the next release version number. +This will create new versioned docs in website/versioned_docs/. + +Then edit docusaurus.config.js and change `lastVersion:` to refer to the new version number. + +Finally, commit and push the following to github: + * website/versioned_docs/version-a.b.c/ + * website/versioned_sidebars/version-a.b.c.sidebars.json + * versions.json + * docusaurus.config.js + + +### Deploying Docs to github.io +Whether you have created a new set of versioned tags or just want to update the docs in "master", to make changes appear at https://netsage-project.github.io/netsage-pipeline, do the following. + +If Travis or some other CI is working, it will run yarn install and yarn deploy to do this automatically. + +If it is not, do it manually: +``` +$ USE_SSH="true" GIT_USER="your-username" yarn deploy +``` +replacing your-username. This sets a couple env vars then runs 'yarn deploy' which runs 'docusaurus deploy' (see package.json) which pushes the static website created to url: "https://netsage-project.github.io" (see docusaurus.config.js) + +NOTE: You need to have created ssh keys on the host you are running this on and added them to your github account. + +### Removing a version + +To remove version 1.2.6 of the docs, for example, + +we need to: + + * update versions.json to remove the reference + * remove the versioned_docs/version-1.2.6 + * remove versioned_sidebars/version-1.2.6-sidebars.json + +## If Using Docker + +You may also use a docs Docker container to simplify installation, making changes, and deployment. This method starts a local web server that allows you to see changes to the docs in a browser on your local machine, as they are made. + +### Build and Start the Container + +Git clone the netsage pipeline project then build and start the container. +The Dockerfile in website/ tells how to build an image that runs yarn. Docker-compose.yml brings up a docs container. +``` +$ cd netsage-pipeline/website +$ docker-compose build build_docs +$ docker-compose up -d docs +go to http://localhost:8000/netsage-pipeline/ +``` + +### To Make Changes +Whether on a local machine or a linux host, to make changes, edit the files in website/docs/. +When finished, git add, git commit, git push, as usual. +Repeat as needed. + +### Tagging a New release + +When it's time to release a new version of the Pipeline, you need to create a new version of the docs as well. + +Once the documentation is stable and you don't forsee any new change, please do the following: + +``` +$ docker-compose build build_docs +$ docker-compose run docs yarn run docusaurus docs:version a.b.c +``` +replacing a.b.c with the next release version number. +This will create new versioned docs in website/versioned_docs/. + +Then edit docusaurus.config.js and change `lastVersion:` to refer to the new version number. + +Finally, commit and push the following to github: + * website/versioned_docs/version-a.b.c/ + * website/versioned_sidebars/version-a.b.c.sidebars.json + * versions.json + * docusaurus.config.js + + +### Deploying Docs to github.io +How to do this when using Docker ??? Get into the container ??? + +For now, go a linux server that has yarn installed and +follow the instructions under If Not Using Docker. + diff --git a/website/versioned_docs/version-1.2.11/devel/pipeline_dataset.md b/website/versioned_docs/version-1.2.11/devel/pipeline_dataset.md new file mode 100644 index 00000000..a061957d --- /dev/null +++ b/website/versioned_docs/version-1.2.11/devel/pipeline_dataset.md @@ -0,0 +1,34 @@ +--- +id: dev_dataset +title: Pipeline Replay Dataset +sidebar_label: Replay Dataset +--- + +The Netsage Pipeline processes network data. Though there are some components and patterns we can use to test +the behavior using things like the Ruby unit [tests](https://github.com/netsage-project/netsage-pipeline/tree/master/conf-logstash/ruby/spec) in logstash, and the [generator](https://www.elastic.co/guide/en/logstash/current/plugins-inputs-generator.html) pligin, but the best +test is to replay network data and inspect the output in the grafana dashboard. + +Two sample data set are provided for the two types of collectors we have (Netflow and Sflow). The network data and ips have been anonymized and should have no identifying information. + +You can download the files from [here](https://drive.google.com/drive/folders/19fzY5EVoKwtYUaiBJq5OxAR82yDY0taG). + +Please take note of which ports the collectors are listing on. Check your docker-compose.override.yml file. If you are using default ports, they should match this [example](https://github.com/netsage-project/netsage-pipeline/blob/master/docker-compose.override_example.yml). + +Currently the default ports are: + - 9998/udp for sflow + - 9999/udp for netflow + +Naturally the collectors have to be running in order for any of this to be usable. You can read more on how to get them running in the [Docker Simple Deployment Guide](../deploy/docker_install_simple.md#running-the-collectors) + +In order to replay the data, use the following commands for netflow and sflow respectively: + +### Netflow + +``` +nfreplay -H 127.0.0.1 -p 9999 -r nfcapd-ilight-anon-20200114 -v 9 -d 1000 +``` + +### Sflow + +Coming soon. nfreplay will not work with sflow data type. + diff --git a/website/versioned_docs/version-1.2.11/devel/tag.md b/website/versioned_docs/version-1.2.11/devel/tag.md new file mode 100644 index 00000000..18819a89 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/devel/tag.md @@ -0,0 +1,113 @@ +--- +id: docker_dev_tag +title: How to Release a New Version of the Pipeline +sidebar_label: Making Releases +--- + +If a new version of nfdump needs to be used, make the new nfdump-collector image(s) first (see below) and update the docker-compose files with the new version number, then make new pipeline_importer and pipeline_logstash images.. + +## Make an RPM Release + +Use standard procedures to create an rpm of the new version of the pipeline. Update the version number and the CHANGES file, build the rpm, repoify, etc., then upgrade grnoc-netsage-deidentifier on bare-metal hosts using yum. If all works well, do the following steps to create new Docker images with which to upgrade Docker deployments. + +## In Github, Create a Release Tag + +Create a new Tag or Release in Github, eg, v1.2.11. +Be sure to copy info from the CHANGES file into the Release description. + +## To Build and Push Images Manually + +Below is the procedure to build pipeline_importer and pipeline_logstash images manually. + +Install docker-compose if not done already. See the Docker Installation instructions. + +Git clone (or git pull) the pipeline project and check out the tag you want to build, then set the version number in docker-compose.build.yml using the script. Eg, for v1.2.11, +``` +git clone https://github.com/netsage-project/netsage-pipeline.git +cd netsage-pipeline +git checkout -b v1.2.11 +./scripts/docker_select_version.sh 1.2.11 +``` + +Then build the pipeline_importer and pipeline_logstash images and push them to Docker Hub: +``` +$ sudo systemctl start docker +$ sudo docker-compose -f docker-compose.build.yml build +$ sudo docker login + provide your DockerHub login credentials +$ sudo docker-compose -f docker-compose.build.yml push (will push images mentioned in docker-compose.yml ??) + or $ docker push $image:$tag (will push a specific image version) +$ sudo systemctl stop docker +``` +If you run into an error about retrieving a mirrorlist and could not find a valid baseurl for repo, restart docker and try again. +If that doesn't work, try adding this to /etc/hosts: `67.219.148.138 mirrorlist.centos.org`, and/or try `yum install net-tools bridge-utils`, and/or restart network.service then docker. + +The person pushing to Docker Hub must have a Docker Hub account and belong to the Netsage team (3 users are allowed, for the free level). + +It might be a good idea to test the images before pushing them. See "Test Docker Images" below. + + +## Building With Automation + +??? + +## Test Docker Images + +See the Docker installation instructions for details... + +In the git checkout of the correct version, make an .env file and a docker-compose.override.yml file. You probably want to send the processed data to a dev Elasticsearch instance. Use samplicate or some other method to have data sent to the dev host. + +Run docker_select_version.sh if you haven't already, then start it up `$ sudo docker-compose up -d`. If there are local images, they'll be used, otherwise they'll be pulled from Docker Hub. + +After about 30 minutes, you should see flows in elasticsearch. + +## Make Versioned Docs + +A new set of versioned docs also has to be tagged once you are done making changes for the latest pipeline version. See the **Docusaurus guide**. + +## To Make New Nfdump-Collector Images + +If a new version of nfdump has been released that we need, new nfdump-collector images need to be made. + +``` +$ git clone https://github.com/netsage-project/docker-nfdump-collector.git +$ cd docker-nfdump-collector +$ sudo systemctl start docker +``` + +To use squash: create a file at /etc/docker/daemon.json and put into it +``` + "experimental": true + "debug: false" +``` + +To build version $VER, eg, 1.6.23 (both regular and alpine linux versions ?): +``` +$ sudo docker build --build-arg NFDUMP_VERSION=$VER --tag netsage/nfdump-collector:$VER --squash collector +$ sudo docker build --build-arg NFDUMP_VERSION=$VER --tag netsage/nfdump-collector:alpine-$VER -f collector/Dockerfile-alpine --squash . +``` + +To push to Docker Hub and quit docker +``` +$ sudo docker login + provide your DockerHub login credentials +$ sudo docker push netsage/nfdump-collector:$VER +$ sudo systemctl stop docker +``` + +To use the new collector image in the pipeline, change the version number in docker-compose.override_example.yml. For example, to use the alpine-1.6.23 image: +``` +sflow-collector: + image: netsage/nfdump-collector:alpine-1.6.23 +... +netflow-collector: + image: netsage/nfdump-collector:alpine-1.6.23 +``` + +Remind users to make the same change in their docker-compose.override.yml file when they do the next pipeline upgrade. + + +### New Version of Logstash + +If a new version of logstash has been released that we want everyone to use, +??? diff --git a/website/versioned_docs/version-1.2.11/pipeline/elastic_search.md b/website/versioned_docs/version-1.2.11/pipeline/elastic_search.md new file mode 100644 index 00000000..c82a8dbd --- /dev/null +++ b/website/versioned_docs/version-1.2.11/pipeline/elastic_search.md @@ -0,0 +1,124 @@ +--- +id: elastic +title: Elasticsearch +sidebar_label: Elasticsearch +--- + +Flow data is ultimately saved to Elasticsearch. Following are the fields that are used/created in Logstash and that you may see returned by an elasticsearch query. + +### Flow fields + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|start |Jun 9, 2020 @ 17:39:53.808 | Start time of the flow (first packet seen)| +|end |Jun 9, 2020 @ 17:39:57.699 |End time of the flow (last packet seen)| +|meta.id |a17c4f0542... |Id of the flow (hash of 5-tuple + Sensor name)| +|es_doc_id |4f46bef884... |Hash of meta.id and start time. May be used as doc id in ES to prevent duplicates, but see Notes elsewhere.| +|meta.flow_type |sflow |'sflow', 'netflow', or 'tstat'| +|meta.protocol |tcp |Protocol used| +|meta.sensor_id | snvl2-pw-sw-1-mgmt-2.cenic.net|Sensor name (set in importer config, may not always be a hostname) | +|meta.sensor_group |CENIC |Sensor group, usually the network | +|meta.sensor_type |Regional Network |Sensor type ('Circuit', 'Regional Network', etc) | +|meta.country_scope |Domestic |'Domestic', 'International', or 'Mixed', depending on countries of src and dst| +|meta.is_network_testing | no | 'yes' if discipline is 'CS.Network Testing and Monitoring' or port is one used for PerfSonar: 5001, 5101, or 5201| + +### Source Fields (Destination Fields similarly with "dst") + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|meta.src_ip |171.64.68.x | deidentified IP address| +|meta.src_port |80 |port used | +|meta.src_asn |32 |Source ASN from the flow header or, in some cases, the ANS of the IP from the MaxMind GeoIP ASN database| +|meta.src_organization |Stanford University | organization that owns the AS from the CAIDA ASN-Organization database +|meta.src_location.lat | 37.423 | latitude of the IP from the MaxMind GeoIP City database| +|meta.src_location.lon |-122.164 | longitude of the IP from the MaxMind GeoIP City database| +|meta.src_country_name |United States | country of the IP from the MaxMind GeoIP City database| +|meta.src_continent |North America | continent of the IP the MaxMind GeoIP City database| +|meta.src_ifindex |166 |the index of the interface the flow came into| + +### Source Science Registry Fields (Destination Fields similarly with "dst") +The [Science Registry](https://scienceregistry.netsage.global/rdb/) stores human-curated information about various "resources". Resources are sources and destinations of flows. + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|meta.scireg.src.discipline |MPS.Physics.High Energy |The science discipline that uses the resource (ie IP). Note that not the src MAY not have the same discipline as the dst. | +|meta.scireg.src.role |Storage |Role that the host plays | +|meta.scireg.src.org_name |Boston University (BU) |The organization the manages and/or uses the resource, as listed in the Science Registry| +|meta.scireg.src.org_abbr |Boston U |A shorter name for the organization. May not be the official abbreviation.| +|meta.scireg.src.resource |BU - ATLAS |Descriptive resource name from SciReg | +|meta.scireg.src.resource_abbr | |Resource abbreviation (if any)| +|meta.scireg.src.project_names |ATLAS |"Projects" that the resource is part of| +|meta.scireg.src.latitude |37.4178 |Resource's latitude, as listed in the Science Registry| +|meta.scireg.src.longitude |-122.178 |Resource's longitude, as listed in the Science Registry| + +### Source "Preferred" Fields (Destination Fields similarly with "dst") + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|meta.src_preferred_org |Stanford University |If the IP was found in the Science Registry, this is the SciReg organization, otherwise it is the CAIDA organization| +|meta.src_preferred_location.lat |37.417800 | Science Registry value if available, otherwise the MaxMind City DB value| +|meta.src_preferred_location.lon |-122.172000i | Science Registry value if available, otherwise the MaxMind City DB value | + +### Value Fields + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|values.num_bits |939, 458, 560 |Sum of the number of bits in the (stitched) flow| +|values.num_packets |77, 824 |Sum of the number of packets in the (stitched) flows| +|values.duration |3.891 |Calculated as end minus start.| +|values.bits_per_second |241, 443, 988 |Calculated as num_bits divided by duration | +|values.packets_per_second |20, 001 |Calculated as num_packets divided by duration| + +### Tstat Value Fields + +|name |example | +|-----------------------|-----------------------| +|values.tcp_cwin_max |1549681 | +|values.tcp_cwin_min |17| +|values.tcp_initial_cwin|313| +|values.tcp_max_seg_size|64313| +|values.tcp_min_seg_size|17| +|values.tcp_mss |8960| +|values.tcp_out_seq_pkts|0| +|values.tcp_pkts_dup |0| +|values.tcp_pkts_fc |0| +|values.tcp_pkts_fs |0| +|values.tcp_pkts_reor |0| +|values.tcp_pkts_rto |0| +|values.tcp_pkts_unfs |0| +|values.tcp_pkts_unk |2| +|values.tcp_pkts_unrto |0| +|values.tcp_rexmit_bytes |1678| +|values.tcp_rexmit_pkts |2| +|values.tcp_rtt_avg |0.044| +|values.tcp_rtt_max |39.527| +|values.tcp_rtt_min |0.001| +|values.tcp_rtt_std |0.276| +|values.tcp_sack_cnt | 1| +|values.tcp_win_max |1549681| +|values.tcp_win_min |17| +|values.tcp_window_scale |13| + +### Developer Fields + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|@pipeline_ver |1.2.11 | Version number of the pipeline used to process this flow | +|@ingest_time |Jun 9, 2020 @ 10:03:20.700 | The time the flow entered the logstash pipeline | +|@timestamp |Jun 9, 2020 @ 18:03:21.703 |The time the flow entered the logstash pipeline for tstat flows, or the time stitching finished and the event exited the aggregation filter for other flows.| +|@exit_time |Jun 9, 2020 @ 18:03:25.369 |The time the flow exited the pipeline | +|@processing_time |688.31 |@exit_time minus @ingest_time. Useful for seeing how long stitching took. | +|stitched_flows |13 |Number of flows that came into logstash that were stitched together to make this final one. 1 if no flows were stitched together. 0 for tstat flows, which are never stitched. | +|tags |maxmind src asn |Various info and error messages| +|trial | 5 |Can be set in 40-aggregation.conf if desired| + +### Elasticsearch Fields + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|_index | om-ns-netsage-2020.06.14 | name of the index ("database table") | +|_type |_doc | set by ES | +|_id |HRkcm3IByJ9fEnbnCpaY | elasticsearch document id. | +|_score |1 |set by ES query | +|@version |1 | set by ES | + diff --git a/website/versioned_docs/version-1.2.11/pipeline/importer.md b/website/versioned_docs/version-1.2.11/pipeline/importer.md new file mode 100644 index 00000000..24b05c4b --- /dev/null +++ b/website/versioned_docs/version-1.2.11/pipeline/importer.md @@ -0,0 +1,14 @@ +--- +id: importer +title: Importer +sidebar_label: Importer +--- +A netsage-netflow-importer script reads any new nfcapd files that have come in after a configurable delay and writes the results to the "netsage_deidentifier_raw" RabbitMQ queue. +All flow data waits in the queue until it is read in and processed by the logstash pipeline. + +To read nfcapd files, the importer uses an nfdump command with the "-a" option to aggregate raw flows within the file by the "5-tuple," i.e., the source and destination IPs, ports, and protocol. The "-L" option is used to throw out any aggregated flows below a threshold number of bytes. This threshold is specified in the importer config file. + +### Configuration +Configuration files for the importer are netsage_netflow_importer.xml and netsage_shared.xml in /etc/grnoc/netsage/deidentfier/. Comments in the files briefly describe the options. See also the Deployment pages in these docs. + +To avoid re-reading nfcapd files, the importer stores the names of files that have already been read in /var/cache/netsage/netflow_importer.cache. diff --git a/website/versioned_docs/version-1.2.11/pipeline/intro.md b/website/versioned_docs/version-1.2.11/pipeline/intro.md new file mode 100644 index 00000000..f4cce287 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/pipeline/intro.md @@ -0,0 +1,37 @@ +--- +id: intro +title: Intro +sidebar_label: Intro +--- +# The NetSage Pipeline + +## Description + +The Netsage Flow Processing Pipeline is composed of several components for processing network flow data, including importing, deidentification, metadata tagging, flow stitching, etc. +There are many ways the components can be combined, configured, and run. These documents will describe the standard "simple" set up and provide information for more complex configurations. + +## Data Collection + +In Netsage, sensor(s) are network devices configured to collect flow data ([tstat](http://tstat.polito.it/), [sflow](https://www.rfc-editor.org/info/rfc3176), or [netflow](https://www.cisco.com/c/en/us/products/collateral/ios-nx-os-software/ios-netflow/prod_white_paper0900aecd80406232.html)) and send it to a "pipeline host" for processing. + +Tstat flow data can be sent directly to the pipeline ingest RabbitMQ queue on the pipeline host using the Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) tool. This can be installed as usual or via Docker. + +Sflow and netflow data from configured routers should be sent to the pipeline host where it is collected and stored into nfcapd files using [nfdump tools](https://github.com/phaag/nfdump). The Netsage project has packaged the nfdump tools into a [Docker container](https://github.com/netsage-project/docker-nfdump-collector) for ease of use. + +## Pipeline Components + +The Netsage Flow Processing Pipeline is made of the following components + + - Importer: Perl scripts on the pipeline host that read nfcapd flow files and send the flow data to a RabbitMQ queue. ([Doc](importer.md), [in github](https://github.com/netsage-project/netsage-pipeline/blob/master/lib/GRNOC/NetSage/Deidentifier/NetflowImporter.pm)) + - [RabbitMQ](https://www.rabbitmq.com/): Used for message passing and queuing of tasks. + - [Logstash](https://www.elastic.co/logstash) pipeline: Performs a variety of operations on the flow data to transform it and add additional information. ([Doc](logstash.md)) + - [Elasticsearch](https://www.elastic.co/what-is/elasticsearch): Used for storing the final flow data. + +## Visualization + +[Grafana](https://grafana.com/oss/grafana/) or [Kibana](https://www.elastic.co/kibana) can be used to visualize the data stored in elasticsearch. Netsage Grafana Dashboards are available [in github](https://github.com/netsage-project/netsage-grafana-configs). + +## Pipeline Installation + +Originally, the pipeline was deployed by installing all of the components individually on one or more servers (the "BareMetal" or "Manual" Install). More recently, we've also added a Docker deployment option. With simple pipelines having just one sflow and/or one netflow sensor (and any number of tstat sensors), the basic "Docker Installation" should suffice. The "Docker Advanced Options" guide will help when there are more sensors and/or other customizations required. + diff --git a/website/versioned_docs/version-1.2.11/pipeline/logstash.md b/website/versioned_docs/version-1.2.11/pipeline/logstash.md new file mode 100644 index 00000000..658b240a --- /dev/null +++ b/website/versioned_docs/version-1.2.11/pipeline/logstash.md @@ -0,0 +1,128 @@ +--- +id: logstash +title: Logstash Pipeline +sidebar_label: Logstash +--- + +The Logstash portion of the Netsage Pipeline reads in flows from a RabbitMQ queue, performs various transformations and adds additional information to them, then sends them to a location specified in the output logstash config, eventually ending up in an Elasticsearch instance. + +Logstash config files invoke various logstash "filters" and actions. These conf files are located in /etc/logstash/conf.d/. See below for a brief description of what each does and check the files for comments. + +Notes: + - All \*.conf files in conf.d/ are executed in alphabetical order, as if they were one huge file. Those ending in .disabled will not be executed (assuming 'path.config: "/etc/logstash/conf.d/*.conf"' in /etc/logstash/pipelines.yml). + - If actions in a particular .conf file are not needed in your particular case, they can be removed or the file disabled, but check carefully for effects on downstream configs. + - MaxMind, CAIDA, and Science Registry database files required by the geoip and aggregate filters are downloaded from scienceregistry.netsage.global via cron jobs weekly or daily. (MaxMind data can change weekly, CAIDA quarterly, Science Registry information randomly.) **NOTE that new versions won't be used in the pipeline until logstash is restarted.** There is a cron file to do this also, though it's not running in Docker deployments. Similarly for other support files, eg, those used in 90-additional-fields.conf. + - Lookup tables for 55-member-orgs.conf that we have compiled are available from sciencregistry.grnoc.iu.edu. See the cron files provided. These will not be updated often, so you may run the cron jobs or not. You will need to provide lists for other networks yourself or ask us. + +## Logstash Sequence + +The main things done in each conf file are as follows. + +### 01-input-rabbit.conf + +Reads flows from a rabbitmq queue. (The ".disabled" extenstion can be removed from other 01-input configs available in conf.d/ to get flows from other sources.) + +### 10-preliminaries.conf + +Drops flows to or from private IP addresses; +converts any timestamps in milliseconds to seconds; +drops events with timestamps more than a year in the past or (10 sec) in the future; +sets duration and rates to 0 if duration is <= 0.002 sec (because tiny durations/few samples lead to inaccurate rates) + +### 15-sensor-specific-changes.conf + +Makes any changes to fields needed for specific sensors. This config currently provides 1) the ability to drop all flows that do not use interfaces (ifindexes) in a specfied list; lists can be sensor-specific, 2) the ability to change the sensor name for flows from a specified sensor which use a certain interface, and 3) the ability to apply a sampling rate correction manually for named sensors. You may edit the file in a bare-metal installation and specify everything explicitly (upgrades will not overwrite this config) or you may use the environment file specified in the systemd unit file. For Docker installations, use the .env file to specifiy the parameters. By default, this config will do nothing since the flags will be set to False. + +### 20-add_id.conf + +Adds a unique id (evenutally called meta.id) which is a hash of the 5-tuple of the flow (src and dst ips and ports, and protocol) plus the sensor name. This id is used for aggregating (stitching) in the next step. + +### 40-aggregation.conf + +Stitches together flows from different nfcapd files into longer flows, matching them up by meta.id and using a specified inactivity_timeout to decide when to start a new flow. + +Notes: + - By default, 5-minute nfcapd files are assumed and the inactivity_timeout is set to 10.5 minutes. If more than 10.5 min have passed between the start of the current flow and the start of the last matching one, do not stitch them together. + - If your nfcapd files are written every 15 minutes, change the inactivity_timeout to at least 16 minutes. + - There is another "timeout" setting which is basically the maximum duration of a stitched flow (default: 24 hr). + - When logstash shuts down, any flows "in the aggregator" will be written out to aggregate_maps_path (default: /tmp/logstash-aggregation-maps). The file is then read back in when logstash is restarted so aggregation can continue. + - Your logstash pipeline can have only 1 worker or aggregation is not going to work! This is set in the logstash config file. + - Tstat flows come in already complete, so no aggregation is done on those flows. + +### 45-geoip-tagging.conf + +Queries the MaxMind GeoLite2-City database by IP to get src and dst Countries, Continents, Latitudes, and Longitudes; +if the destination IP is in the multicast range, sets the destination Organization, Country, and Continent to "Multicast". + +*This product uses GeoLite2 data created by MaxMind, available from [www.maxmind.com](http://www.maxmind.com).* + +### 50-asn.conf + +Normally with sflow and netflow, flows come in with source and destination ASNs. If there is no ASN in the input event; or the input ASN is 0, 4294967295, or 23456, or it is a private ASN, tries to get an ASN by IP from the MaxMind ASN database. +Sets ASN to -1 if it is unavailable for any reason. + +### 53-caida-org.conf + +Uses the current source and destination ASNs to get organization names from the prepared CAIDA ASN-to-Organization lookup file. + +*This product uses a lookup table constructed from the CAIDA AS Organizations Dataset - see [www.caida.org](http://www.caida.org/data/as-organizations).* + +### 55-member-orgs.conf + +Searches any provided lookup tables by IP to obtain member or customer organization names and overwrite the Organization determined previously. +This allows entities which don't own their own ASs to be listed as the src or dst Organization. + +Note: These lookup tables are not stored in github, but an example is provided to show the layout and tables we have can be downloaded via a cron job. + +### 60-scireg-tagging-fakegeoip.conf + +Uses a fake geoip database containing [Science Registry](http://scienceregistry.grnoc.iu.edu) information to tag the flows with source and destination science disciplines and roles, organizations and locations, etc; +removes Registry fields we don't need to save to elasticsearch. + +Notes: + - The [Science Registry](https://scienceregistry.netsage.global/rdb/) stores human-curated information about various "resources". Resources are sources and destinations of flows. + - The Science Registry "fake geoip database" is updated weekly and can be downloaded via wget in a cron job (provided in the installation). + +### 70-deidentify.conf + +Replaces the last octet of IPv4 addresses and the last 4 hextets of IPv6 addresses with x's in order to deidentify them. + +### 80-privatize.org.conf + +Removes information about Australian organizations (or, with modification, any country that has privacy rules that require us not to identify organizations). +If the ASN is one of those listed, completely replaces the IP with x's, sets the location to central Autralia, sets all organizations to "AARNet", removes all Projects. + +### 88-preferred-location-org.conf + +Copies Science Registry organization and location values, if they exist, to the meta.preferred_organization and meta.preferred_location fields. If there are no Science Registry values, the organizations and locations from the CAIDA and MaxMind lookups, respectively, are saved to those fields. + +### 90-additional-fields.conf + +Sets additional quick and easy fields. Supporting mapping or ruby files are used - see support/ and ruby/ in conf.d/. Currently we have (for Netsage's use): + - sensor_group = TACC, AMPATH, etc. (based on matching sensor names to regexes) + - sensor_type = Circuit, Archive, Exchange Point, or Regional Network (based on matching sensor names to regexes) + - country_scope = Domestic, International, or Mixed (based on src and dst countries and possibly continents, where Domestic = US, Puerto Rico, or Guam) + - is_network_testing = yes, no (yes if discipline from the science registry is 'CS.Network Testing and Monitoring' or port = 5001, 5101, or 5201) + - es_doc_id = hash of meta.id and the start time of the flow. If this id is used as the document id in elasticsearch, flows that are mistakenly input more than once will update existing documents rather than be added as duplicates. (NOTE: due to how netflow works, use es_doc_id as the ES document id only for sflow!) + +### 95-cleanup.conf + +Does small misc. tasks at the end like rename, remove, or convert fields + +### 98-post-process.conf + +Adds @exit_time and @processing_time (these are mainly for developers) + +### 99-output-rabbit.conf + +Sends results to a final RabbitMQ queue. (".disabled" can be removed from other output configs to send flows to other places) + +### Final Stage + +In the GlobalNOC-Netsage case, the output filter writes the flows to a network-specific RabbitMQ queue on another host and the last stage is a separate logstash pipeline on a 3rd host. The latter reads flows from the final queue using a rabbitmq input filter and sends it into elasticsearch using an elasticsearch output filter with a mapping template which sets data types for the fields. + +## Field names + +The fields used/created in Logstash (and saved to Elasticsearch) are listed in the [Elasticsearch doc](elastic). + + diff --git a/website/versioned_docs/version-1.2.11/pipeline/nfdump.md b/website/versioned_docs/version-1.2.11/pipeline/nfdump.md new file mode 100644 index 00000000..b9519282 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/pipeline/nfdump.md @@ -0,0 +1,17 @@ +--- +id: nfdump +title: Sflow/Netflow Data Collection +sidebar_label: Sflow/Netflow Data +--- + +Sflow and Netflow export can be configured on appropriate network devices. Netsage uses tools in the Nfdump package to collect and process the resulting flow data. The toolset supports netflow v1, v5/v7, v9, IPFIX and SFLOW, IPv4 as well as IPv6. + +## Netsage Usage + +Nfcapd and/or sfcapd processes (from the nfdump package) are used to collect incoming netflow and/or sflow data and save it to disk in nfcapd files. The files are then read by the [importer](importer), which uses an nfdump command, and sent to RabbitMQ. From there, the [logstash](logstash) pipeline ingests the flows and processes them in exactly the same way as it processes tstat flows. The data is eventually saved in elasticsearch and visualized by [grafana dashboards](https://github.com/netsage-project/netsage-grafana-configs). + +One may also use the nfdump command interactively to view the flows in a nfcapd file in a terminal window. + +## Docker Deployment + +The nfdump/nfcapd/sfcapd processes can be invoked locally or using a Docker container. The Docker deployment of the Pipeline uses an nfdump Docker container. (See the Docker Deployment Guide.) The Docker image definitions can be found [HERE](https://github.com/netsage-project/docker-nfdump-collector) diff --git a/website/versioned_docs/version-1.2.11/pipeline/tstat.md b/website/versioned_docs/version-1.2.11/pipeline/tstat.md new file mode 100644 index 00000000..baab97c5 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/pipeline/tstat.md @@ -0,0 +1,16 @@ +--- +id: tstat +title: Tstat Data Collection +sidebar_label: Tstat Data +--- + +## Netsage GitHub Project + +[Tstat](http://tstat.polito.it/) is a passive sniffer that provides insights into traffic patterns. The Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) project provides client programs to parse the captured data and send it to a rabbitmq host where it can then be processed by the [logstash pipeline](logstash), stored in elasticsearch, and finally displayed in our Grafana [dashboards](https://github.com/netsage-project/netsage-grafana-configs). + +## Docker + +Netsage Docker images exist on Docker Hub for tstat and tstat_transport. This is still in a beta state and is in development. The initial documentation is available [here](https://github.com/netsage-project/tstat-transport/blob/master/docs/docker.md). + + + diff --git a/website/versioned_sidebars/version-1.2.11-sidebars.json b/website/versioned_sidebars/version-1.2.11-sidebars.json new file mode 100644 index 00000000..40a8c9ac --- /dev/null +++ b/website/versioned_sidebars/version-1.2.11-sidebars.json @@ -0,0 +1,89 @@ +{ + "version-1.2.11/Pipeline": [ + { + "collapsed": true, + "type": "category", + "label": "Pipeline", + "items": [ + { + "type": "doc", + "id": "version-1.2.11/pipeline/intro" + }, + { + "type": "doc", + "id": "version-1.2.11/pipeline/tstat" + }, + { + "type": "doc", + "id": "version-1.2.11/pipeline/nfdump" + }, + { + "type": "doc", + "id": "version-1.2.11/pipeline/importer" + }, + { + "type": "doc", + "id": "version-1.2.11/pipeline/logstash" + }, + { + "type": "doc", + "id": "version-1.2.11/pipeline/elastic" + } + ] + }, + { + "collapsed": true, + "type": "category", + "label": "Deployment", + "items": [ + { + "type": "doc", + "id": "version-1.2.11/deploy/choose_install" + }, + { + "type": "doc", + "id": "version-1.2.11/deploy/bare_metal_install" + }, + { + "type": "doc", + "id": "version-1.2.11/deploy/docker_install_simple" + }, + { + "type": "doc", + "id": "version-1.2.11/deploy/docker_install_advanced" + }, + { + "type": "doc", + "id": "version-1.2.11/deploy/docker_upgrade" + }, + { + "type": "doc", + "id": "version-1.2.11/deploy/docker_troubleshoot" + } + ] + }, + { + "collapsed": true, + "type": "category", + "label": "Development", + "items": [ + { + "type": "doc", + "id": "version-1.2.11/devel/dev_dataset" + }, + { + "type": "doc", + "id": "version-1.2.11/devel/docker_dev_guide" + }, + { + "type": "doc", + "id": "version-1.2.11/devel/docusaurus" + }, + { + "type": "doc", + "id": "version-1.2.11/devel/docker_dev_tag" + } + ] + } + ] +} diff --git a/website/versions.json b/website/versions.json index 2303a6b2..5b4b29b1 100644 --- a/website/versions.json +++ b/website/versions.json @@ -1,4 +1,5 @@ [ + "1.2.11", "1.2.10", "1.2.9", "1.2.8", From 99468dcd8f38049e180fd14ce1c80f328dcf4bf4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 9 Sep 2021 21:27:35 +0000 Subject: [PATCH 044/126] Bump axios from 0.21.1 to 0.21.4 in /website Bumps [axios](https://github.com/axios/axios) from 0.21.1 to 0.21.4. - [Release notes](https://github.com/axios/axios/releases) - [Changelog](https://github.com/axios/axios/blob/master/CHANGELOG.md) - [Commits](https://github.com/axios/axios/compare/v0.21.1...v0.21.4) --- updated-dependencies: - dependency-name: axios dependency-type: indirect ... Signed-off-by: dependabot[bot] --- website/yarn.lock | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/website/yarn.lock b/website/yarn.lock index 3cfba086..880062a0 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -2292,11 +2292,11 @@ autoprefixer@^9.4.7, autoprefixer@^9.6.1: postcss-value-parser "^4.1.0" axios@^0.21.1: - version "0.21.1" - resolved "https://registry.yarnpkg.com/axios/-/axios-0.21.1.tgz#22563481962f4d6bde9a76d516ef0e5d3c09b2b8" - integrity sha512-dKQiRHxGD9PPRIUNIWvZhPTPpl1rf/OxTYKsqKUDjBwYylTvV7SjSHJb9ratfyzM6wCdLCOYLzs73qpg5c4iGA== + version "0.21.4" + resolved "https://registry.yarnpkg.com/axios/-/axios-0.21.4.tgz#c67b90dc0568e5c1cf2b0b858c43ba28e2eda575" + integrity sha512-ut5vewkiu8jjGBdqpM44XxjuCjq9LAKeHVmoVfHVzy8eHgxxq8SbAVQNovDA8mVi05kP0Ea/n/UzcSHcTJQfNg== dependencies: - follow-redirects "^1.10.0" + follow-redirects "^1.14.0" babel-loader@^8.2.2: version "8.2.2" @@ -4412,10 +4412,10 @@ flux@^4.0.1: fbemitter "^3.0.0" fbjs "^3.0.0" -follow-redirects@^1.0.0, follow-redirects@^1.10.0: - version "1.13.3" - resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.13.3.tgz#e5598ad50174c1bc4e872301e82ac2cd97f90267" - integrity sha512-DUgl6+HDzB0iEptNQEXLx/KhTmDb8tZUHSeLqpnjpknR70H0nC2t9N73BK6fN4hOvJ84pKlIQVQ4k5FFlBedKA== +follow-redirects@^1.0.0, follow-redirects@^1.14.0: + version "1.14.3" + resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.14.3.tgz#6ada78118d8d24caee595595accdc0ac6abd022e" + integrity sha512-3MkHxknWMUtb23apkgz/83fDoe+y+qr0TdgacGIA7bew+QLBo3vdgEN2xEsuXNivpFy4CyDhBBZnNZOtalmenw== for-in@^1.0.2: version "1.0.2" From 312c37156e0f25a9902c0a7c6f6732aa8c2ec59e Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 9 Sep 2021 21:27:39 +0000 Subject: [PATCH 045/126] already a typo and an addition in docs --- website/docs/deploy/docker_upgrade.md | 2 +- website/docs/devel/documentation_guide.md | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/website/docs/deploy/docker_upgrade.md b/website/docs/deploy/docker_upgrade.md index d640d12a..9fac01af 100644 --- a/website/docs/deploy/docker_upgrade.md +++ b/website/docs/deploy/docker_upgrade.md @@ -50,7 +50,7 @@ Occasionally, something may change which will necessitate editing your override ### Update Docker Containers -This should be done automatically when you start up the conctainers, but you can also pull new images from Docker Hub now. +This should be done automatically when you start up the containers, but you can also pull new images from Docker Hub now. ``` docker-compose pull diff --git a/website/docs/devel/documentation_guide.md b/website/docs/devel/documentation_guide.md index 076628b2..7287ed06 100644 --- a/website/docs/devel/documentation_guide.md +++ b/website/docs/devel/documentation_guide.md @@ -92,6 +92,9 @@ we need to: * update versions.json to remove the reference * remove the versioned_docs/version-1.2.6 * remove versioned_sidebars/version-1.2.6-sidebars.json + * change 1.2.6 in docusaurus.config.js back to 1.2.5 + +Then git add, commit, and push ## If Using Docker From 070c5141c23b674061a6f126f23e474569b1d61a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 20 Sep 2021 22:07:23 +0000 Subject: [PATCH 046/126] Bump prismjs from 1.24.0 to 1.25.0 in /website Bumps [prismjs](https://github.com/PrismJS/prism) from 1.24.0 to 1.25.0. - [Release notes](https://github.com/PrismJS/prism/releases) - [Changelog](https://github.com/PrismJS/prism/blob/master/CHANGELOG.md) - [Commits](https://github.com/PrismJS/prism/compare/v1.24.0...v1.25.0) --- updated-dependencies: - dependency-name: prismjs dependency-type: indirect ... Signed-off-by: dependabot[bot] --- website/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/yarn.lock b/website/yarn.lock index 3cfba086..d1553f41 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -7737,9 +7737,9 @@ prism-react-renderer@^1.1.1: integrity sha512-GHqzxLYImx1iKN1jJURcuRoA/0ygCcNhfGw1IT8nPIMzarmKQ3Nc+JcG0gi8JXQzuh0C5ShE4npMIoqNin40hg== prismjs@^1.23.0: - version "1.24.0" - resolved "https://registry.yarnpkg.com/prismjs/-/prismjs-1.24.0.tgz#0409c30068a6c52c89ef7f1089b3ca4de56be2ac" - integrity sha512-SqV5GRsNqnzCL8k5dfAjCNhUrF3pR0A9lTDSCUZeh/LIshheXJEaP0hwLz2t4XHivd2J/v2HR+gRnigzeKe3cQ== + version "1.25.0" + resolved "https://registry.yarnpkg.com/prismjs/-/prismjs-1.25.0.tgz#6f822df1bdad965734b310b315a23315cf999756" + integrity sha512-WCjJHl1KEWbnkQom1+SzftbtXMKQoezOCYs5rECqMN+jP+apI7ftoflyqigqzopSO3hMhTEb0mFClA8lkolgEg== process-nextick-args@~2.0.0: version "2.0.1" From 31090f6439fc062c5d404104a67f4ddd932723bb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 23 Nov 2021 21:39:09 +0000 Subject: [PATCH 047/126] Bump algoliasearch-helper from 3.4.4 to 3.6.2 in /website Bumps [algoliasearch-helper](https://github.com/algolia/algoliasearch-helper-js) from 3.4.4 to 3.6.2. - [Release notes](https://github.com/algolia/algoliasearch-helper-js/releases) - [Changelog](https://github.com/algolia/algoliasearch-helper-js/blob/develop/CHANGELOG) - [Commits](https://github.com/algolia/algoliasearch-helper-js/compare/3.4.4...3.6.2) --- updated-dependencies: - dependency-name: algoliasearch-helper dependency-type: indirect ... Signed-off-by: dependabot[bot] --- website/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/yarn.lock b/website/yarn.lock index 3cfba086..076de524 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -2047,9 +2047,9 @@ ajv@^6.1.0, ajv@^6.10.2, ajv@^6.12.4, ajv@^6.12.5: uri-js "^4.2.2" algoliasearch-helper@^3.3.4: - version "3.4.4" - resolved "https://registry.yarnpkg.com/algoliasearch-helper/-/algoliasearch-helper-3.4.4.tgz#f2eb46bc4d2f6fed82c7201b8ac4ce0a1988ae67" - integrity sha512-OjyVLjykaYKCMxxRMZNiwLp8CS310E0qAeIY2NaublcmLAh8/SL19+zYHp7XCLtMem2ZXwl3ywMiA32O9jszuw== + version "3.6.2" + resolved "https://registry.yarnpkg.com/algoliasearch-helper/-/algoliasearch-helper-3.6.2.tgz#45e19b12589cfa0c611b573287f65266ea2cc14a" + integrity sha512-Xx0NOA6k4ySn+R2l3UMSONAaMkyfmrZ3AP1geEMo32MxDJQJesZABZYsldO9fa6FKQxH91afhi4hO1G0Zc2opg== dependencies: events "^1.1.1" From 5c09fefd00e42b4d07eb1c51600241b79ef3a6f1 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Tue, 14 Dec 2021 21:52:08 +0000 Subject: [PATCH 048/126] Upping logstash version to 7.16.1 and adding LEARN to the sensor group and type regexes. Pipeline version 1.2.12 --- CHANGES.md | 10 ++++++++++ compose/logstash/Dockerfile | 4 ++-- conf-logstash/98-post-process.conf | 2 +- conf-logstash/support/sensor_groups.json | 1 + conf-logstash/support/sensor_types.json | 1 + grnoc-netsage-deidentifier.spec | 2 +- lib/GRNOC/NetSage/Deidentifier.pm | 2 +- 7 files changed, 17 insertions(+), 5 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 07229f77..52d9b3ff 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,13 @@ +------------------------------------------------------ +## GRNOC NetSage Deidentfier 1.2.12 -- Dec 14, 2021 +------------------------------------------------------ +Usage note: With this release, we will move to using logstash 7.16.1 to fix a Log4j vulnerability. +Bare-metal installations will need to upgrade logstash manually. + +Features: + * In the dockerfile, increased the version on which the logstash docker container is based + * Added LEARN to the regexes in the sensor groups and types support files + ------------------------------------------------------ ## GRNOC NetSage Deidentfier 1.2.11 -- Sept 3, 2021 ------------------------------------------------------ diff --git a/compose/logstash/Dockerfile b/compose/logstash/Dockerfile index 1d935220..4acdbb5d 100644 --- a/compose/logstash/Dockerfile +++ b/compose/logstash/Dockerfile @@ -1,4 +1,4 @@ -FROM docker.elastic.co/logstash/logstash:7.10.1 +FROM docker.elastic.co/logstash/logstash:7.16.1 #Create symlink so can use paths from production with logstash docker defaults USER root @@ -11,4 +11,4 @@ USER logstash VOLUME /var/cache/netsage VOLUME /var/lib/grnoc/netsage/ -VOLUME /usr/share/logstash/config/ \ No newline at end of file +VOLUME /usr/share/logstash/config/ diff --git a/conf-logstash/98-post-process.conf b/conf-logstash/98-post-process.conf index 9e9d9d03..68d8c48d 100644 --- a/conf-logstash/98-post-process.conf +++ b/conf-logstash/98-post-process.conf @@ -5,7 +5,7 @@ filter { code => ' event.set( "@exit_time", Time.now ); event.set( "@processing_time", event.get("@exit_time") - event.get("@ingest_time") ); - event.set( "@pipeline_ver", "1.2.11" ); + event.set( "@pipeline_ver", "1.2.12" ); ' tag_on_exception => '_rubyexception in 98-post-process.conf' } diff --git a/conf-logstash/support/sensor_groups.json b/conf-logstash/support/sensor_groups.json index 8d81e64b..32007f75 100644 --- a/conf-logstash/support/sensor_groups.json +++ b/conf-logstash/support/sensor_groups.json @@ -14,6 +14,7 @@ "^.*nersc.*": "NERSC", "^.*pacificwave.*": "PacWave", "^.*pnw-gigapop\\.net$": "PacWave", + "^LEARN.*": "LEARN", "^PennREN.*": "PennREN", "^SANReN.*": "SANReN", "^SingAREN.*": "SingAREN", diff --git a/conf-logstash/support/sensor_types.json b/conf-logstash/support/sensor_types.json index 3b8c25b2..a635e1b1 100644 --- a/conf-logstash/support/sensor_types.json +++ b/conf-logstash/support/sensor_types.json @@ -17,6 +17,7 @@ "^FRGP.*$": "Regional Network", "^GigaPOP.*$": "Regional Network", "^i-Light.*$": "Regional Network", + "^LEARN.*$": "Regional Network", "^PennREN.*$": "Regional Network", "^SANReN.*$": "Regional Network", "^.*sox.*$": "Regional Network", diff --git a/grnoc-netsage-deidentifier.spec b/grnoc-netsage-deidentifier.spec index eedfe0ea..595cd2a6 100644 --- a/grnoc-netsage-deidentifier.spec +++ b/grnoc-netsage-deidentifier.spec @@ -1,6 +1,6 @@ Summary: GRNOC NetSage Flow-Processing Pipeline Name: grnoc-netsage-deidentifier -Version: 1.2.11 +Version: 1.2.12 # update Version here, in conf-logstash/98-post-process.conf, lib/GRNOC/NetSage/Deidentifier.pm Release: 1%{?dist} License: GRNOC diff --git a/lib/GRNOC/NetSage/Deidentifier.pm b/lib/GRNOC/NetSage/Deidentifier.pm index 66d9de3e..d5b9eb3c 100644 --- a/lib/GRNOC/NetSage/Deidentifier.pm +++ b/lib/GRNOC/NetSage/Deidentifier.pm @@ -3,7 +3,7 @@ package GRNOC::NetSage::Deidentifier; use strict; use warnings; -our $VERSION = "1.2.11"; +our $VERSION = "1.2.12"; 1; From 4fac4eb8edb5898f9a645957c8a517cdd67c827f Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Tue, 4 Jan 2022 21:22:05 +0000 Subject: [PATCH 049/126] Updating v1.2.12 to build the logstash image from logstash-7.16.2, to get additional log4j vulnerability fixes. --- CHANGES.md | 7 ++++--- compose/logstash/Dockerfile | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 52d9b3ff..98be2368 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,11 +1,12 @@ ------------------------------------------------------ -## GRNOC NetSage Deidentfier 1.2.12 -- Dec 14, 2021 +## GRNOC NetSage Deidentfier 1.2.12 -- Jan 4, 2022 ------------------------------------------------------ -Usage note: With this release, we will move to using logstash 7.16.1 to fix a Log4j vulnerability. +Usage note: With this release, we will move to using logstash 7.16.2 to fix a Log4j vulnerability. Bare-metal installations will need to upgrade logstash manually. +(Dec 14,2021- original 1.2.12 release with logstash 7.16.1 in the pipeline_logstash Dockerfile) Features: - * In the dockerfile, increased the version on which the logstash docker container is based + * In the dockerfile, increased the version of logstash on which the pipeline_logstash container is based * Added LEARN to the regexes in the sensor groups and types support files ------------------------------------------------------ diff --git a/compose/logstash/Dockerfile b/compose/logstash/Dockerfile index 4acdbb5d..d81448f5 100644 --- a/compose/logstash/Dockerfile +++ b/compose/logstash/Dockerfile @@ -1,4 +1,4 @@ -FROM docker.elastic.co/logstash/logstash:7.16.1 +FROM docker.elastic.co/logstash/logstash:7.16.2 #Create symlink so can use paths from production with logstash docker defaults USER root From 12617ea96de311be3164bfcd66ab4d509bbdd3bc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 15 Jan 2022 06:57:58 +0000 Subject: [PATCH 050/126] Bump shelljs from 0.8.4 to 0.8.5 in /website Bumps [shelljs](https://github.com/shelljs/shelljs) from 0.8.4 to 0.8.5. - [Release notes](https://github.com/shelljs/shelljs/releases) - [Changelog](https://github.com/shelljs/shelljs/blob/master/CHANGELOG.md) - [Commits](https://github.com/shelljs/shelljs/compare/v0.8.4...v0.8.5) --- updated-dependencies: - dependency-name: shelljs dependency-type: indirect ... Signed-off-by: dependabot[bot] --- website/yarn.lock | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/website/yarn.lock b/website/yarn.lock index 8fa24776..76a520ed 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -2367,9 +2367,9 @@ bail@^1.0.0: integrity sha512-xFbRxM1tahm08yHBP16MMjVUAvDaBMD38zsM9EMAUN61omwLmKlOpB/Zku5QkjZ8TZ4vn53pj+t518cH0S03RQ== balanced-match@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.0.tgz#89b4d199ab2bee49de164ea02b89ce462d71b767" - integrity sha1-ibTRmasr7kneFk6gK4nORi1xt2c= + version "1.0.2" + resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.2.tgz#e83e3a7e3f300b34cb9d87f615fa0cbf357690ee" + integrity sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw== base16@^1.0.0: version "1.0.0" @@ -4586,9 +4586,9 @@ glob-parent@^5.1.0, glob-parent@^5.1.1, glob-parent@~5.1.0: is-glob "^4.0.1" glob@^7.0.0, glob@^7.0.3, glob@^7.1.3, glob@^7.1.4: - version "7.1.6" - resolved "https://registry.yarnpkg.com/glob/-/glob-7.1.6.tgz#141f33b81a7c2492e125594307480c46679278a6" - integrity sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA== + version "7.2.0" + resolved "https://registry.yarnpkg.com/glob/-/glob-7.2.0.tgz#d15535af7732e02e948f4c41628bd910293f6023" + integrity sha512-lmLf6gtyrPq8tTjSmrO94wBeQbFR3HbLHbuyD69wuyQkImp2hWqMGB47OX65FBkPffO641IP9jWa1z4ivqG26Q== dependencies: fs.realpath "^1.0.0" inflight "^1.0.4" @@ -5353,6 +5353,13 @@ is-core-module@^2.2.0: dependencies: has "^1.0.3" +is-core-module@^2.8.0: + version "2.8.1" + resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.8.1.tgz#f59fdfca701d5879d0a6b100a40aa1560ce27211" + integrity sha512-SdNCUs284hr40hFTFP6l0IfZ/RSrMXF3qgoRHd3/79unUTvrFO/JoXwkGm+5J/Oe3E/b5GsnG330uUNgRpu1PA== + dependencies: + has "^1.0.3" + is-data-descriptor@^0.1.4: version "0.1.4" resolved "https://registry.yarnpkg.com/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz#0b5ee648388e2c860282e793f1856fec3f301b56" @@ -6930,7 +6937,7 @@ path-key@^3.0.0, path-key@^3.1.0: resolved "https://registry.yarnpkg.com/path-key/-/path-key-3.1.1.tgz#581f6ade658cbba65a0d3380de7753295054f375" integrity sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q== -path-parse@^1.0.6: +path-parse@^1.0.6, path-parse@^1.0.7: version "1.0.7" resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.7.tgz#fbc114b60ca42b30d9daf5858e4bd68bbedb6735" integrity sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw== @@ -8421,7 +8428,16 @@ resolve-url@^0.2.1: resolved "https://registry.yarnpkg.com/resolve-url/-/resolve-url-0.2.1.tgz#2c637fe77c893afd2a663fe21aa9080068e2052a" integrity sha1-LGN/53yJOv0qZj/iGqkIAGjiBSo= -resolve@^1.1.6, resolve@^1.14.2, resolve@^1.3.2: +resolve@^1.1.6: + version "1.21.0" + resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.21.0.tgz#b51adc97f3472e6a5cf4444d34bc9d6b9037591f" + integrity sha512-3wCbTpk5WJlyE4mSOtDLhqQmGFi0/TD9VPwmiolnk8U0wRgMEktqCXd3vy5buTO3tljvalNvKrjHEfrd2WpEKA== + dependencies: + is-core-module "^2.8.0" + path-parse "^1.0.7" + supports-preserve-symlinks-flag "^1.0.0" + +resolve@^1.14.2, resolve@^1.3.2: version "1.20.0" resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.20.0.tgz#629a013fb3f70755d6f0b7935cc1c2c5378b1975" integrity sha512-wENBPt4ySzg4ybFQW2TT1zMQucPK95HSh/nq2CFTZVOGut2+pQvSsgtda4d26YrYcr067wjbmzOG8byDPBX63A== @@ -8764,9 +8780,9 @@ shell-quote@1.7.2: integrity sha512-mRz/m/JVscCrkMyPqHc/bczi3OQHkLTqXHEFu0zDhK/qfv3UcOA4SVmRCLmos4bhjr9ekVQubj/R7waKapmiQg== shelljs@^0.8.4: - version "0.8.4" - resolved "https://registry.yarnpkg.com/shelljs/-/shelljs-0.8.4.tgz#de7684feeb767f8716b326078a8a00875890e3c2" - integrity sha512-7gk3UZ9kOfPLIAbslLzyWeGiEqx9e3rxwZM0KE6EL8GlGwjym9Mrlx5/p33bWTu9YG6vcS4MBxYZDHYr5lr8BQ== + version "0.8.5" + resolved "https://registry.yarnpkg.com/shelljs/-/shelljs-0.8.5.tgz#de055408d8361bed66c669d2f000538ced8ee20c" + integrity sha512-TiwcRcrkhHvbrZbnRcFYMLl30Dfov3HKqzp5tO5b4pt6G/SezKcYhmDg15zXVBswHmctSAQKznqNW2LO5tTDow== dependencies: glob "^7.0.0" interpret "^1.0.0" @@ -9176,6 +9192,11 @@ supports-color@^7.0.0, supports-color@^7.1.0: dependencies: has-flag "^4.0.0" +supports-preserve-symlinks-flag@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz#6eda4bd344a3c94aea376d4cc31bc77311039e09" + integrity sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w== + svg-parser@^2.0.2: version "2.0.4" resolved "https://registry.yarnpkg.com/svg-parser/-/svg-parser-2.0.4.tgz#fdc2e29e13951736140b76cb122c8ee6630eb6b5" From 0243faf7c4af99e23162eb7440ca5239be38c9be Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 22 Jan 2022 03:20:08 +0000 Subject: [PATCH 051/126] Bump nanoid from 3.1.22 to 3.2.0 in /website Bumps [nanoid](https://github.com/ai/nanoid) from 3.1.22 to 3.2.0. - [Release notes](https://github.com/ai/nanoid/releases) - [Changelog](https://github.com/ai/nanoid/blob/main/CHANGELOG.md) - [Commits](https://github.com/ai/nanoid/compare/3.1.22...3.2.0) --- updated-dependencies: - dependency-name: nanoid dependency-type: indirect ... Signed-off-by: dependabot[bot] --- website/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/yarn.lock b/website/yarn.lock index 8fa24776..ade51388 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -6398,9 +6398,9 @@ nan@^2.12.1: integrity sha512-M2ufzIiINKCuDfBSAUr1vWQ+vuVcA9kqx8JJUsbQi6yf1uGRyb7HfpdfUr5qLXf3B/t8dPvcjhKMmlfnP47EzQ== nanoid@^3.1.22: - version "3.1.22" - resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.1.22.tgz#b35f8fb7d151990a8aebd5aa5015c03cf726f844" - integrity sha512-/2ZUaJX2ANuLtTvqTlgqBQNJoQO398KyJgZloL0PZkC0dpysjncRUPsFe3DUPzz/y3h+u7C46np8RMuvF3jsSQ== + version "3.2.0" + resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.2.0.tgz#62667522da6673971cca916a6d3eff3f415ff80c" + integrity sha512-fmsZYa9lpn69Ad5eDn7FMcnnSR+8R34W9qJEijxYhTbfOWzr22n1QxCMzXLK+ODyW2973V3Fux959iQoUxzUIA== nanomatch@^1.2.9: version "1.2.13" From f43182e656e3bf7e7f040dd60d5d9cb7911f9b6a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 22 Jan 2022 15:09:01 +0000 Subject: [PATCH 052/126] Bump node-fetch from 2.6.1 to 3.1.1 in /website Bumps [node-fetch](https://github.com/node-fetch/node-fetch) from 2.6.1 to 3.1.1. - [Release notes](https://github.com/node-fetch/node-fetch/releases) - [Changelog](https://github.com/node-fetch/node-fetch/blob/main/docs/CHANGELOG.md) - [Commits](https://github.com/node-fetch/node-fetch/compare/v2.6.1...v3.1.1) --- updated-dependencies: - dependency-name: node-fetch dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- website/package.json | 2 +- website/yarn.lock | 41 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/website/package.json b/website/package.json index 9c0d48ae..826ac11d 100644 --- a/website/package.json +++ b/website/package.json @@ -13,7 +13,7 @@ "@docusaurus/preset-classic": "^2.0.0-alpha.72", "classnames": "^2.2.6", "immer": "^9.0.6", - "node-fetch": "^2.6.1", + "node-fetch": "^3.1.1", "react": "^16.8.4", "react-dom": "^16.8.4", "remark-images": "^2.0.0", diff --git a/website/yarn.lock b/website/yarn.lock index 8fa24776..6b579fbf 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -3569,6 +3569,11 @@ cyclist@^1.0.1: resolved "https://registry.yarnpkg.com/cyclist/-/cyclist-1.0.1.tgz#596e9698fd0c80e12038c2b82d6eb1b35b6224d9" integrity sha1-WW6WmP0MgOEgOMK4LW6xs1tiJNk= +data-uri-to-buffer@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/data-uri-to-buffer/-/data-uri-to-buffer-4.0.0.tgz#b5db46aea50f6176428ac05b73be39a57701a64b" + integrity sha512-Vr3mLBA8qWmcuschSLAOogKgQ/Jwxulv3RNE4FXnYWRGujzrRWQI4m12fQqRkwX06C0KanhLr4hK+GydchZsaA== + debug@2.6.9, debug@^2.2.0, debug@^2.3.3, debug@^2.6.0: version "2.6.9" resolved "https://registry.yarnpkg.com/debug/-/debug-2.6.9.tgz#5d128515df134ff327e90a4c93f4e077a536341f" @@ -4298,6 +4303,14 @@ feed@^4.2.2: dependencies: xml-js "^1.6.11" +fetch-blob@^3.1.2, fetch-blob@^3.1.3: + version "3.1.4" + resolved "https://registry.yarnpkg.com/fetch-blob/-/fetch-blob-3.1.4.tgz#e8c6567f80ad7fc22fd302e7dcb72bafde9c1717" + integrity sha512-Eq5Xv5+VlSrYWEqKrusxY1C3Hm/hjeAsCGVG3ft7pZahlUAChpGZT/Ms1WmSLnEAisEXszjzu/s+ce6HZB2VHA== + dependencies: + node-domexception "^1.0.0" + web-streams-polyfill "^3.0.3" + figgy-pudding@^3.5.1: version "3.5.2" resolved "https://registry.yarnpkg.com/figgy-pudding/-/figgy-pudding-3.5.2.tgz#b4eee8148abb01dcf1d1ac34367d59e12fa61d6e" @@ -4435,6 +4448,13 @@ fork-ts-checker-webpack-plugin@4.1.6: tapable "^1.0.0" worker-rpc "^0.1.0" +formdata-polyfill@^4.0.10: + version "4.0.10" + resolved "https://registry.yarnpkg.com/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz#24807c31c9d402e002ab3d8c720144ceb8848423" + integrity sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g== + dependencies: + fetch-blob "^3.1.2" + forwarded@~0.1.2: version "0.1.2" resolved "https://registry.yarnpkg.com/forwarded/-/forwarded-0.1.2.tgz#98c23dab1175657b8c0573e8ceccd91b0ff18c84" @@ -6442,6 +6462,11 @@ no-case@^3.0.4: lower-case "^2.0.2" tslib "^2.0.3" +node-domexception@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/node-domexception/-/node-domexception-1.0.0.tgz#6888db46a1f71c0b76b3f7555016b63fe64766e5" + integrity sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ== + node-emoji@^1.10.0: version "1.10.0" resolved "https://registry.yarnpkg.com/node-emoji/-/node-emoji-1.10.0.tgz#8886abd25d9c7bb61802a658523d1f8d2a89b2da" @@ -6449,11 +6474,20 @@ node-emoji@^1.10.0: dependencies: lodash.toarray "^4.4.0" -node-fetch@2.6.1, node-fetch@^2.6.1: +node-fetch@2.6.1: version "2.6.1" resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.1.tgz#045bd323631f76ed2e2b55573394416b639a0052" integrity sha512-V4aYg89jEoVRxRb2fJdAg8FHvI7cEyYdVAh94HH0UIK8oJxUfkjlDQN9RbMx+bEjP7+ggMiFRprSti032Oipxw== +node-fetch@^3.1.1: + version "3.1.1" + resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-3.1.1.tgz#d0d9607e455b3087e3092b821b5b1f1ebf4c2147" + integrity sha512-SMk+vKgU77PYotRdWzqZGTZeuFKlsJ0hu4KPviQKkfY+N3vn2MIzr0rvpnYpR8MtB3IEuhlEcuOLbGvLRlA+yg== + dependencies: + data-uri-to-buffer "^4.0.0" + fetch-blob "^3.1.3" + formdata-polyfill "^4.0.10" + node-forge@^0.10.0: version "0.10.0" resolved "https://registry.yarnpkg.com/node-forge/-/node-forge-0.10.0.tgz#32dea2afb3e9926f02ee5ce8794902691a676bf3" @@ -9866,6 +9900,11 @@ web-namespaces@^1.0.0, web-namespaces@^1.1.2: resolved "https://registry.yarnpkg.com/web-namespaces/-/web-namespaces-1.1.4.tgz#bc98a3de60dadd7faefc403d1076d529f5e030ec" integrity sha512-wYxSGajtmoP4WxfejAPIr4l0fVh+jeMXZb08wNc0tMg6xsfZXj3cECqIK0G7ZAqUq0PP8WlMDtaOGVBTAWztNw== +web-streams-polyfill@^3.0.3: + version "3.2.0" + resolved "https://registry.yarnpkg.com/web-streams-polyfill/-/web-streams-polyfill-3.2.0.tgz#a6b74026b38e4885869fb5c589e90b95ccfc7965" + integrity sha512-EqPmREeOzttaLRm5HS7io98goBgZ7IVz79aDvqjD0kYXLtFZTc0T/U6wHTPKyIjb+MdN7DFIIX6hgdBEpWmfPA== + webpack-bundle-analyzer@^4.4.0: version "4.4.0" resolved "https://registry.yarnpkg.com/webpack-bundle-analyzer/-/webpack-bundle-analyzer-4.4.0.tgz#74013106e7e2b07cbd64f3a5ae847f7e814802c7" From e6e6756b4f9c6594d24640afcc0a8fc0fdb8f884 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 13 Feb 2022 20:30:19 +0000 Subject: [PATCH 053/126] Bump follow-redirects from 1.14.3 to 1.14.8 in /website Bumps [follow-redirects](https://github.com/follow-redirects/follow-redirects) from 1.14.3 to 1.14.8. - [Release notes](https://github.com/follow-redirects/follow-redirects/releases) - [Commits](https://github.com/follow-redirects/follow-redirects/compare/v1.14.3...v1.14.8) --- updated-dependencies: - dependency-name: follow-redirects dependency-type: indirect ... Signed-off-by: dependabot[bot] --- website/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/yarn.lock b/website/yarn.lock index 8fa24776..7d940023 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -4413,9 +4413,9 @@ flux@^4.0.1: fbjs "^3.0.0" follow-redirects@^1.0.0, follow-redirects@^1.14.0: - version "1.14.3" - resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.14.3.tgz#6ada78118d8d24caee595595accdc0ac6abd022e" - integrity sha512-3MkHxknWMUtb23apkgz/83fDoe+y+qr0TdgacGIA7bew+QLBo3vdgEN2xEsuXNivpFy4CyDhBBZnNZOtalmenw== + version "1.14.8" + resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.14.8.tgz#016996fb9a11a100566398b1c6839337d7bfa8fc" + integrity sha512-1x0S9UVJHsQprFcEC/qnNzBLcIxsjAV905f/UkQxbclCsoTWlacCNOpQa/anodLl2uaEKFhfWOvM2Qg77+15zA== for-in@^1.0.2: version "1.0.2" From 321528d4e7a4d1be59a10678959f82f250326eae Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 26 Feb 2022 02:02:52 +0000 Subject: [PATCH 054/126] Bump prismjs from 1.25.0 to 1.27.0 in /website Bumps [prismjs](https://github.com/PrismJS/prism) from 1.25.0 to 1.27.0. - [Release notes](https://github.com/PrismJS/prism/releases) - [Changelog](https://github.com/PrismJS/prism/blob/master/CHANGELOG.md) - [Commits](https://github.com/PrismJS/prism/compare/v1.25.0...v1.27.0) --- updated-dependencies: - dependency-name: prismjs dependency-type: indirect ... Signed-off-by: dependabot[bot] --- website/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/yarn.lock b/website/yarn.lock index 8fa24776..636f255a 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -7737,9 +7737,9 @@ prism-react-renderer@^1.1.1: integrity sha512-GHqzxLYImx1iKN1jJURcuRoA/0ygCcNhfGw1IT8nPIMzarmKQ3Nc+JcG0gi8JXQzuh0C5ShE4npMIoqNin40hg== prismjs@^1.23.0: - version "1.25.0" - resolved "https://registry.yarnpkg.com/prismjs/-/prismjs-1.25.0.tgz#6f822df1bdad965734b310b315a23315cf999756" - integrity sha512-WCjJHl1KEWbnkQom1+SzftbtXMKQoezOCYs5rECqMN+jP+apI7ftoflyqigqzopSO3hMhTEb0mFClA8lkolgEg== + version "1.27.0" + resolved "https://registry.yarnpkg.com/prismjs/-/prismjs-1.27.0.tgz#bb6ee3138a0b438a3653dd4d6ce0cc6510a45057" + integrity sha512-t13BGPUlFDR7wRB5kQDG4jjl7XeuH6jbJGt11JHPL96qwsEHNX2+68tFXqc1/k+/jALsbSWJKUOT/hcYAZ5LkA== process-nextick-args@~2.0.0: version "2.0.1" From 9386a18d9a24f1d7c1f5a3715cacb73cb7b02ade Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 27 Feb 2022 10:24:41 +0000 Subject: [PATCH 055/126] Bump url-parse from 1.5.3 to 1.5.10 in /website Bumps [url-parse](https://github.com/unshiftio/url-parse) from 1.5.3 to 1.5.10. - [Release notes](https://github.com/unshiftio/url-parse/releases) - [Commits](https://github.com/unshiftio/url-parse/compare/1.5.3...1.5.10) --- updated-dependencies: - dependency-name: url-parse dependency-type: indirect ... Signed-off-by: dependabot[bot] --- website/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/yarn.lock b/website/yarn.lock index 8fa24776..79f103f5 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -9686,9 +9686,9 @@ url-parse-lax@^3.0.0: prepend-http "^2.0.0" url-parse@^1.4.3, url-parse@^1.5.1: - version "1.5.3" - resolved "https://registry.yarnpkg.com/url-parse/-/url-parse-1.5.3.tgz#71c1303d38fb6639ade183c2992c8cc0686df862" - integrity sha512-IIORyIQD9rvj0A4CLWsHkBBJuNqWpFQe224b6j9t/ABmquIS0qDU2pY6kl6AuOrL5OkCXHMCFNe1jBcuAggjvQ== + version "1.5.10" + resolved "https://registry.yarnpkg.com/url-parse/-/url-parse-1.5.10.tgz#9d3c2f736c1d75dd3bd2be507dcc111f1e2ea9c1" + integrity sha512-WypcfiRhfeUP9vvF0j6rw0J3hrWrw6iZv3+22h6iRMJ/8z1Tj6XfLP4DsUix5MhMPnXpiHDoKyoZ/bdCkwBCiQ== dependencies: querystringify "^2.1.1" requires-port "^1.0.0" From 61a562453e1ccffbfec6126714a5d5b7561fed6b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Mar 2022 03:55:17 +0000 Subject: [PATCH 056/126] Bump minimist from 1.2.5 to 1.2.6 in /website Bumps [minimist](https://github.com/substack/minimist) from 1.2.5 to 1.2.6. - [Release notes](https://github.com/substack/minimist/releases) - [Commits](https://github.com/substack/minimist/compare/1.2.5...1.2.6) --- updated-dependencies: - dependency-name: minimist dependency-type: indirect ... Signed-off-by: dependabot[bot] --- website/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/yarn.lock b/website/yarn.lock index 8fa24776..68fb5efb 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -6266,9 +6266,9 @@ minimatch@3.0.4, minimatch@^3.0.4: brace-expansion "^1.1.7" minimist@^1.2.0, minimist@^1.2.5: - version "1.2.5" - resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.5.tgz#67d66014b66a6a8aaa0c083c5fd58df4e4e97602" - integrity sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw== + version "1.2.6" + resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.6.tgz#8637a5b759ea0d6e98702cfb3a9283323c93af44" + integrity sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q== minipass-collect@^1.0.2: version "1.0.2" From 2992dc99dac3ff7d5d57a7a95dc245db1d596b6c Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 14 Apr 2022 20:02:02 +0000 Subject: [PATCH 057/126] Removed everything having to do with the old importer, renamed package to gernoc-netsage-pipeline, no more perl code so took our requirements, misc cleanup --- grnoc-netsage-pipeline.spec | 179 ++++++++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 grnoc-netsage-pipeline.spec diff --git a/grnoc-netsage-pipeline.spec b/grnoc-netsage-pipeline.spec new file mode 100644 index 00000000..ad421cb5 --- /dev/null +++ b/grnoc-netsage-pipeline.spec @@ -0,0 +1,179 @@ +Summary: GRNOC NetSage Flow-Processing Pipeline +Name: grnoc-netsage-pipeline +Version: 2.0.0 + # update Version here, in Makefile.PL, conf-logstash/98-post-process.conf +Release: 1%{?dist} +License: GRNOC +Group: Measurement +URL: http://globalnoc.iu.edu +Source0: %{name}-%{version}.tar.gz +BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root +BuildArch: noarch +#Requires: perl >= 5.8.8 +# these are part of perl with centos6, not with centos7. Could just require perl-core package? +#%if 0%{?rhel} >= 7 +#Requires: perl-Data-Dumper +#Requires: perl-Getopt-Long +#Requires: perl-Storable +#%endif +#Requires: perl-AnyEvent +#Requires: perl-Clone +#Requires: perl-Data-Validate-IP +#Requires: perl-TimeDate +#Requires: perl-Digest-SHA +#Requires: perl-GRNOC-Config +#Requires: perl-GRNOC-Log +#Requires: perl-GRNOC-RabbitMQ +#Requires: perl-Hash-Merge +#Requires: perl-IPC-ShareLite +#Requires: perl-JSON-SL +#Requires: perl-JSON-XS +#Requires: perl-List-MoreUtils +#Requires: perl-Math-Round +#Requires: perl-Moo +#Requires: perl-Net-AMQP-RabbitMQ +#Requires: perl-Net-IP +#Requires: perl-Number-Bytes-Human +#Requires: perl-Parallel-ForkManager +#Requires: perl-Path-Class +#Requires: perl-Path-Tiny +#Requires: perl-Proc-Daemon +#Requires: perl-TimeDate +#Requires: perl-Time-Duration +#Requires: perl-Time-HiRes +#Requires: perl-Try-Tiny +#Requires: perl-Type-Tiny +Requires: wget +Requires: logstash >= 7.16.2 +Requires: rubygem-ipaddress +Requires: pmaccct >= 1.7.7 + +%description +GRNOC NetSage Flow-Processing Pipeline + +%prep +%setup -q -n grnoc-netsage-pipeline-%{version} + +%build +%{__perl} Makefile.PL PREFIX="%{buildroot}%{_prefix}" INSTALLDIRS="vendor" +make + +%install +rm -rf $RPM_BUILD_ROOT +make pure_install + +# for lookup files (maxmind, etc) +%{__install} -d -p %{buildroot}/var/lib/grnoc/netsage/ + +#%{__install} -d -p %{buildroot}/var/cache/netsage/ +#%{__install} -d -p %{buildroot}/etc/init.d/ + +%{__install} -d -p %{buildroot}/usr/bin/ +%{__install} -d -p %{buildroot}/etc/cron.d/ +%{__install} -d -p %{buildroot}/etc/systemd/system/ +%{__install} -d -p %{buildroot}/etc/logstash/conf.d/ +%{__install} -d -p %{buildroot}/etc/logstash/conf.d/ruby/ +%{__install} -d -p %{buildroot}/etc/logstash/conf.d/support/ +%{__install} -d -p %{buildroot}/usr/share/logstash/config/ +%{__install} -d -p %{buildroot}/usr/share/doc/grnoc/netsage-pipeline/ + +%{__install} bin/restart-logstash.sh %{buildroot}/usr/bin/restart-logstash.sh + +%{__install} cron.d/netsage-scireg-update.cron %{buildroot}/etc/cron.d/netsage-scireg-update.cron +%{__install} cron.d/netsage-maxmind-update.cron %{buildroot}/etc/cron.d/netsage-maxmind-update.cron +%{__install} cron.d/netsage-caida-update.cron %{buildroot}/etc/cron.d/netsage-caida-update.cron +%{__install} cron.d/netsage-memberlists-update.cron %{buildroot}/etc/cron.d/netsage-memberlists-update.cron +%{__install} cron.d/netsage-logstash-restart.cron %{buildroot}/etc/cron.d/netsage-logstash-restart.cron + +%{__install} systemd/logstash.service %{buildroot}/etc/systemd/system/logstash.service + +%{__install} conf-logstash/*.conf %{buildroot}/etc/logstash/conf.d/ +%{__install} conf-logstash/*.conf.disabled %{buildroot}/etc/logstash/conf.d/ +%{__install} conf-logstash/ruby/* %{buildroot}/etc/logstash/conf.d/ruby/ +%{__install} conf-logstash/support/* %{buildroot}/etc/logstash/conf.d/support/ + +%{__install} CHANGES.md %{buildroot}/usr/share/doc/grnoc/netsage-pipeline/CHANGES.md +%{__install} website/docs/deploy/bare_metal_install.md %{buildroot}/usr/share/doc/grnoc/netsage-pipeline/INSTALL.md + + +# clean up buildroot +find %{buildroot} -name .packlist -exec %{__rm} {} \; + +%{_fixperms} $RPM_BUILD_ROOT/* + +%clean +rm -rf $RPM_BUILD_ROOT + +%files + +%defattr(644, root, root, 755) + +# Don't overwrite cron files. Create .rpmnew files if needed. +%config(noreplace) /etc/cron.d/netsage-scireg-update.cron +%config(noreplace) /etc/cron.d/netsage-maxmind-update.cron +%config(noreplace) /etc/cron.d/netsage-caida-update.cron +%config(noreplace) /etc/cron.d/netsage-memberlists-update.cron +%config(noreplace) /etc/cron.d/netsage-logstash-restart.cron + +# Don't overwrite these .confs. Create .rpmnew files if needed. +%config(noreplace) /etc/logstash/conf.d/01-input-rabbit.conf +%config(noreplace) /etc/logstash/conf.d/01-input-multiline-json-file.conf +%config(noreplace) /etc/logstash/conf.d/01-input-jsonfile.conf +%config(noreplace) /etc/logstash/conf.d/15-sensor-specific-changes.conf +%config(noreplace) /etc/logstash/conf.d/40-aggregation.conf +%config(noreplace) /etc/logstash/conf.d/99-output-rabbit.conf +%config(noreplace) /etc/logstash/conf.d/99-output-jsonlog.conf +%config(noreplace) /etc/logstash/conf.d/99-output-multiline-json.conf +%config(noreplace) /etc/logstash/conf.d/99-output-elastic.conf +# logstash files that can be updated automatically (if there are updates, the old ver will be in .rpmsave) +%config /etc/logstash/conf.d/10-preliminaries.conf +%config /etc/logstash/conf.d/20-add-id.conf +%config /etc/logstash/conf.d/45-geoip-tagging.conf +%config /etc/logstash/conf.d/50-asn.conf +%config /etc/logstash/conf.d/53-caida-org.conf +%config /etc/logstash/conf.d/55-member-orgs.conf +%config /etc/logstash/conf.d/60-scireg-tagging-fakegeoip.conf +%config /etc/logstash/conf.d/70-deidentify.conf +%config /etc/logstash/conf.d/80-privatize-org.conf +%config /etc/logstash/conf.d/88-preferred-location-org.conf +%config /etc/logstash/conf.d/90-additional-fields.conf +%config /etc/logstash/conf.d/95-cleanup.conf +%config /etc/logstash/conf.d/98-post-process.conf +%config /etc/logstash/conf.d/99-output-stdout.conf.disabled +%config /etc/logstash/conf.d/ruby/anonymize_ipv6.rb +%config /etc/logstash/conf.d/ruby/domestic.rb +%config /etc/logstash/conf.d/support/sensor_groups.json +%config /etc/logstash/conf.d/support/sensor_types.json +%config /etc/logstash/conf.d/support/networkA-members-list.rb.example + +/usr/share/doc/grnoc/netsage-pipeline/CHANGES.md +#/usr/share/doc/grnoc/netsage-pipeline/INSTALL.md + +%defattr(754, root, root, -) +/usr/bin/restart-logstash.sh + +%defattr(644, root, root, -) +/etc/systemd/system/logstash.service + +%defattr(-, root, root, 755) +/var/lib/grnoc/netsage/ +/var/cache/netsage/ + +%post +echo " " +echo "-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*" +echo "AFTER UPGRADING..." +echo " " +echo " * Check config and cron files with .rpmnew and .rpmsave versions to see if any need manual updates." +echo " * Logstash configs 01, 15, 40, and 99 are not replaced by updated versions, so check to see if there are changes. " +echo " * If using 55-member-orgs.conf, make sure you have the required files in support/. See comments in the conf file. " +echo " " +echo " * Note that this rpm puts logstash config files in /etc/logstash/conf.d/ and doesn't manage multiple pipelines in pipelines.yml." +echo " " +echo " * IMPORTANT: Be sure the number of logstash pipeline workers is 1, or flow stitching (aggregation) won't work right. **" +echo " * and be sure logstash configs are specified by *.conf in the right directory." +echo " " +echo " * [Re]start logstash and pmacct processes " +echo "-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*" +echo " " + From c70697eb21638483ee98f57d1dc2624913344ab5 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 14 Apr 2022 20:43:35 +0000 Subject: [PATCH 058/126] More removed everything having to do with the old importer, renamed package to gernoc-netsage-pipeline, no more perl code so took our requirements, misc cleanup --- MANIFEST | 40 +- Makefile.PL | 32 +- README.md | 4 +- bin/netsage-flow-filter-daemon | 59 -- bin/netsage-netflow-importer-daemon | 64 -- conf-logstash/98-post-process.conf | 2 +- conf/logging-debug.conf | 21 - conf/logging.conf | 16 - conf/netsage_flow_filter.xml | 33 - conf/netsage_netflow_importer.xml | 54 -- conf/netsage_shared.xml | 57 -- grnoc-netsage-deidentifier.spec | 214 ----- grnoc-netsage-pipeline.spec | 14 +- init.d/netsage-flow-filter-daemon | 77 -- init.d/netsage-netflow-importer-daemon | 78 -- lib/GRNOC/NetSage/Deidentifier.pm | 9 - lib/GRNOC/NetSage/Deidentifier/FlowFilter.pm | 278 ------- .../NetSage/Deidentifier/NetflowImporter.pm | 755 ------------------ lib/GRNOC/NetSage/Deidentifier/Pipeline.pm | 641 --------------- .../NetSage/Deidentifier/WorkerManager.pm | 260 ------ old_stitcher/FlowStitcher.pm | 311 -------- old_stitcher/netsage-flow-stitcher-daemon | 73 -- old_stitcher/netsage_flow_stitcher.xml | 38 - systemd/netsage-flow-filter.service | 19 - systemd/netsage-netflow-importer.service | 21 - 25 files changed, 29 insertions(+), 3141 deletions(-) delete mode 100755 bin/netsage-flow-filter-daemon delete mode 100755 bin/netsage-netflow-importer-daemon delete mode 100644 conf/logging-debug.conf delete mode 100644 conf/logging.conf delete mode 100644 conf/netsage_flow_filter.xml delete mode 100644 conf/netsage_netflow_importer.xml delete mode 100644 conf/netsage_shared.xml delete mode 100644 grnoc-netsage-deidentifier.spec delete mode 100644 init.d/netsage-flow-filter-daemon delete mode 100644 init.d/netsage-netflow-importer-daemon delete mode 100644 lib/GRNOC/NetSage/Deidentifier.pm delete mode 100644 lib/GRNOC/NetSage/Deidentifier/FlowFilter.pm delete mode 100644 lib/GRNOC/NetSage/Deidentifier/NetflowImporter.pm delete mode 100644 lib/GRNOC/NetSage/Deidentifier/Pipeline.pm delete mode 100644 lib/GRNOC/NetSage/Deidentifier/WorkerManager.pm delete mode 100644 old_stitcher/FlowStitcher.pm delete mode 100644 old_stitcher/netsage-flow-stitcher-daemon delete mode 100644 old_stitcher/netsage_flow_stitcher.xml delete mode 100644 systemd/netsage-flow-filter.service delete mode 100644 systemd/netsage-netflow-importer.service diff --git a/MANIFEST b/MANIFEST index 05510a4e..ac9f6cf0 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1,7 +1,6 @@ -bin/netsage-flow-filter-daemon -bin/netsage-netflow-importer-daemon -bin/restart-logstash.sh +grnoc-netsage-pipeline.spec CHANGES.md +bin/restart-logstash.sh conf-logstash/01-input-jsonfile.conf.disabled conf-logstash/01-input-multiline-json-file.conf.disabled conf-logstash/01-input-rabbit.conf @@ -30,48 +29,13 @@ conf-logstash/ruby/domestic.rb conf-logstash/support/sensor_groups.json conf-logstash/support/sensor_types.json conf-logstash/support/networkA-members-list.rb.example -conf/logging-debug.conf -conf/logging.conf -conf/netsage_flow_filter.xml -conf/netsage_netflow_importer.xml -conf/netsage_shared.xml cron.d/netsage-maxmind-update.cron cron.d/netsage-caida-update.cron cron.d/netsage-scireg-update.cron cron.d/netsage-logstash-restart.cron cron.d/netsage-memberlists-update.cron -grnoc-netsage-deidentifier.spec -init.d/netsage-flow-filter-daemon -init.d/netsage-netflow-importer-daemon -systemd/netsage-flow-filter.service -systemd/netsage-netflow-importer.service systemd/logstash.service -lib/GRNOC/NetSage/Deidentifier.pm -lib/GRNOC/NetSage/Deidentifier/FlowFilter.pm -lib/GRNOC/NetSage/Deidentifier/NetflowImporter.pm -lib/GRNOC/NetSage/Deidentifier/Pipeline.pm -lib/GRNOC/NetSage/Deidentifier/WorkerManager.pm Makefile.PL MANIFEST README.md website/docs/deploy/bare_metal_install.md -reporting/flow-mongo-stats.pl -reporting/getdata-cron.pl -reporting/getdata.pl -reporting/queues.txt -reporting/queuestats.pl -test-data/data1.json -test-data/data2.json -test-data/scireg2.json -util/export-tsds -util/generate_data.pl -util/header.pl -util/hist-export.pl -util/json2lines -util/json_to_rabbit.pl -util/lines2json -util/netsage-raw-data-importer -util/netsage_raw_data_importer.xml.example -util/nfcache -util/RawDataImporter.pm -util/tstat-flow-copier diff --git a/Makefile.PL b/Makefile.PL index ef129cf6..14ed353b 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -6,29 +6,31 @@ use ExtUtils::MakeMaker; sub MY::postamble { <<'END'; } rpm: dist - rpmbuild -ta grnoc-netsage-deidentifier-$(VERSION).tar.gz + rpmbuild -ta grnoc-netsage-pipeline-$(VERSION).tar.gz END -sub MY::test - { - q( -TEST_VERBOSE=1 +#sub MY::test +# { +# q( +#TEST_VERBOSE=1 +# +#test : pure_all +# $(FULLPERL) t/TEST $(TEST_VERBOSE) +# +#test_jenkins : pure_all +# $(FULLPERL) t/TEST $(TEST_VERBOSE) --formatter=TAP::Formatter::Console +# ); +#} -test : pure_all - $(FULLPERL) t/TEST $(TEST_VERBOSE) - -test_jenkins : pure_all - $(FULLPERL) t/TEST $(TEST_VERBOSE) --formatter=TAP::Formatter::Console - ); -} +# VERSION_FROM => 'lib/GRNOC/NetSage/Deidentifier.pm', WriteMakefile( - NAME => 'grnoc-netsage-deidentifier', + NAME => 'grnoc-netsage-pipeline', AUTHOR => 'GRNOC Software Engineering ', - VERSION_FROM => 'lib/GRNOC/NetSage/Deidentifier.pm', + VERSION => '2.0.0', PL_FILES => {}, PREREQ_PM => { }, dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', }, - clean => { FILES => 'grnoc-netsage-deidentifier-*' }, + clean => { FILES => 'grnoc-netsage-pipeline-*' }, ); diff --git a/README.md b/README.md index c2775c40..2fea9971 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,9 @@ [![Build Status](https://travis-ci.com/netsage-project/netsage-pipeline.svg?branch=master)](https://travis-ci.com/netsage-project/netsage-pipeline) -The Netsage Flow Processing Pipeline includes several components for processing network flow data, including importing, deidentification, metadata tagging, flow stitching, etc. +The Netsage Flow Processing Pipeline includes several components for processing network flow data, including collection, deidentification, metadata tagging, flow stitching, etc. -Detailed documentation is available [here](https://netsage-project.github.io/netsage-pipeline/) +Detailed documentation is available [here]. (https://netsage-project.github.io/netsage-pipeline/) diff --git a/bin/netsage-flow-filter-daemon b/bin/netsage-flow-filter-daemon deleted file mode 100755 index bb3a2624..00000000 --- a/bin/netsage-flow-filter-daemon +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/perl - -use strict; -use warnings; - -use GRNOC::NetSage::Deidentifier::FlowFilter; -use GRNOC::NetSage::Deidentifier::WorkerManager; - -use Getopt::Long; -use Data::Dumper; - -### constants ### -use constant DEFAULT_CONFIG_FILE => '/etc/grnoc/netsage/deidentifier/netsage_flow_filter.xml'; -use constant DEFAULT_SHARED_CONFIG_FILE => '/etc/grnoc/netsage/deidentifier/netsage_shared.xml'; -use constant DEFAULT_LOGGING_FILE => '/etc/grnoc/netsage/deidentifier/logging.conf'; - -### command line options ### - -my $config = DEFAULT_CONFIG_FILE; -my $shared_config = DEFAULT_SHARED_CONFIG_FILE; -my $logging = DEFAULT_LOGGING_FILE; -my $nofork; -my $help; - -# TODO: change jsonfile to flow data directory -GetOptions( 'config=s' => \$config, - 'sharedconfig=s' => \$shared_config, - 'logging=s' => \$logging, - 'nofork' => \$nofork, - 'help|h|?' => \$help ); - -# did they ask for help? -usage() if $help; - -# start/daemonize filter -my $flow_importer = GRNOC::NetSage::Deidentifier::FlowFilter->new( config_file => $config, - shared_config_file => $shared_config, - logging_file => $logging, - daemonize => !$nofork, - process_name => 'netsage_flow_filter' ); - -my $worker = GRNOC::NetSage::Deidentifier::WorkerManager->new( config_file => $config, - logging_file => $logging, - daemonize => !$nofork, - process_name => 'netsage_flow_filter', - worker => $flow_importer ); - - -$worker->start(); -print (" ** Check ps or /var/log/messages to be sure the processes have started successfully. **\n"); - -### helpers ### - -sub usage { - - print "Usage: $0 [--config ] [--logging ] [--flowpath ]\n"; - - exit( 1 ); -} diff --git a/bin/netsage-netflow-importer-daemon b/bin/netsage-netflow-importer-daemon deleted file mode 100755 index a07925f9..00000000 --- a/bin/netsage-netflow-importer-daemon +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/perl - -use strict; -use warnings; - -use GRNOC::NetSage::Deidentifier::NetflowImporter; -use GRNOC::NetSage::Deidentifier::WorkerManager; - -use Getopt::Long; -use Data::Dumper; - -### constants ### -use constant DEFAULT_CONFIG_FILE => '/etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml'; -use constant DEFAULT_SHARED_CONFIG_FILE => '/etc/grnoc/netsage/deidentifier/netsage_shared.xml'; -use constant DEFAULT_LOGGING_FILE => '/etc/grnoc/netsage/deidentifier/logging.conf'; - -### command line options ### - -my $config = DEFAULT_CONFIG_FILE; -my $logging = DEFAULT_LOGGING_FILE; -my $shared_config = DEFAULT_SHARED_CONFIG_FILE; -my $nofork; -my $flowpath; -my $cachefile; -my $help; - -GetOptions( 'config=s' => \$config, - 'sharedconfig=s' => \$shared_config, - 'logging=s' => \$logging, - 'nofork' => \$nofork, - 'flowpath=s' => \$flowpath, - 'cachefile=s' => \$cachefile, - 'help|h|?' => \$help ); - -# did they ask for help? -usage() if $help; - -# start/daemonize importer -my $flow_importer = GRNOC::NetSage::Deidentifier::NetflowImporter->new( config_file => $config, - shared_config_file => $shared_config, - logging_file => $logging, - daemonize => !$nofork, - cache_file => $cachefile, - process_name => 'netsage_netflow_importer', - flow_path => $flowpath ); - -my $worker = GRNOC::NetSage::Deidentifier::WorkerManager->new( config_file => $config, - logging_file => $logging, - daemonize => !$nofork, - process_name => 'netsage_netflow_importer', - worker => $flow_importer ); - - -$worker->start("no_input_queue"); -print (" ** Check ps or /var/log/messages to be sure the processes have started successfully. **\n"); - -### helpers ### - -sub usage { - - print "Usage: $0 [--config ] [--sharedconfig ] [--logging ] [--flowpath ]\n"; - - exit( 1 ); -} diff --git a/conf-logstash/98-post-process.conf b/conf-logstash/98-post-process.conf index 68d8c48d..89109bde 100644 --- a/conf-logstash/98-post-process.conf +++ b/conf-logstash/98-post-process.conf @@ -5,7 +5,7 @@ filter { code => ' event.set( "@exit_time", Time.now ); event.set( "@processing_time", event.get("@exit_time") - event.get("@ingest_time") ); - event.set( "@pipeline_ver", "1.2.12" ); + event.set( "@pipeline_ver", "2.0.0" ); ' tag_on_exception => '_rubyexception in 98-post-process.conf' } diff --git a/conf/logging-debug.conf b/conf/logging-debug.conf deleted file mode 100644 index b5897a27..00000000 --- a/conf/logging-debug.conf +++ /dev/null @@ -1,21 +0,0 @@ -log4perl.rootLogger = DEBUG, SYSLOG, screen -log4perl.appender.SYSLOG = Log::Dispatch::Syslog -# uncomment this next line if you want to restrict syslog to 'info' and above -#log4perl.appender.SYSLOG.min_level = info -log4perl.appender.SYSLOG.ident = sub { \ - my $process = $0; \ - if ( $process =~ /netsage-(.+)-daemon/ ) { \ - my $ident = $1; \ - return "NETSAGE-".uc($ident); \ - } else { \ - return "NETSAGE-".$process; \ - } \ -} -log4perl.appender.SYSLOG.facility = LOCAL0 -log4perl.appender.SYSLOG.layout = PatternLayout -log4perl.appender.SYSLOG.layout.ConversionPattern=[%d] %F %L %c - %m%n - -log4perl.appender.screen = Log::Log4perl::Appender::Screen -log4perl.appender.screen.stderr = 0 -log4perl.appender.screen.layout = PatternLayout -log4perl.appender.screen.layout.ConversionPattern = %d %p> %F{1}:%L %M - %m%n diff --git a/conf/logging.conf b/conf/logging.conf deleted file mode 100644 index d8f89915..00000000 --- a/conf/logging.conf +++ /dev/null @@ -1,16 +0,0 @@ -log4perl.rootLogger = INFO, SYSLOG - -log4perl.appender.SYSLOG = Log::Dispatch::Syslog -log4perl.appender.SYSLOG.min_level = info -log4perl.appender.SYSLOG.facility = LOCAL0 -log4perl.appender.SYSLOG.ident = sub { \ - my $process = $0; \ - if ( $process =~ /netsage-(.+)-daemon/ ) { \ - my $ident = $1; \ - return "NETSAGE-".uc($ident); \ - } else { \ - return "NETSAGE-".$process; \ - } \ -} -log4perl.appender.SYSLOG.layout = PatternLayout -log4perl.appender.SYSLOG.layout.ConversionPattern=%F[%L] %p: %m%n diff --git a/conf/netsage_flow_filter.xml b/conf/netsage_flow_filter.xml deleted file mode 100644 index 882f59a8..00000000 --- a/conf/netsage_flow_filter.xml +++ /dev/null @@ -1,33 +0,0 @@ - - - - netsage_deidentifier_raw_prefilter - 2 - - - 3 - netsage_deidentifier_raw - - - - 127.0.0.1 - 5672 - guest - guest - Simp - 60 - Simp.Data - - - - 100 - - 1 - - - - - - /var/run/netsage-flow-filter-daemon.pid - - diff --git a/conf/netsage_netflow_importer.xml b/conf/netsage_netflow_importer.xml deleted file mode 100644 index 7bdf7410..00000000 --- a/conf/netsage_netflow_importer.xml +++ /dev/null @@ -1,54 +0,0 @@ - - - - - - netsage_deidentifier_netflow_fake - 2 - - - 3 - netsage_deidentifier_raw - - - - - 100 - - - 1 - - - - - - /var/cache/netsage/netflow_importer.cache - - - - 10000000 - - - 10m - - - - - - - - - - - - - - /var/run/netsage-netflow-importer-daemon.pid - - - diff --git a/conf/netsage_shared.xml b/conf/netsage_shared.xml deleted file mode 100644 index 63c60c99..00000000 --- a/conf/netsage_shared.xml +++ /dev/null @@ -1,57 +0,0 @@ - - - - - - - - - - sensorname - - - /path/to/flow-files - - - sflow - - - - - - - - - - 127.0.0.1 - 5672 - guest - guest - 0 - 100 - / - 1 - - - - 127.0.0.1 - 5672 - guest - guest - 0 - 100 - / - 1 - - diff --git a/grnoc-netsage-deidentifier.spec b/grnoc-netsage-deidentifier.spec deleted file mode 100644 index 595cd2a6..00000000 --- a/grnoc-netsage-deidentifier.spec +++ /dev/null @@ -1,214 +0,0 @@ -Summary: GRNOC NetSage Flow-Processing Pipeline -Name: grnoc-netsage-deidentifier -Version: 1.2.12 - # update Version here, in conf-logstash/98-post-process.conf, lib/GRNOC/NetSage/Deidentifier.pm -Release: 1%{?dist} -License: GRNOC -Group: Measurement -URL: http://globalnoc.iu.edu -Source0: %{name}-%{version}.tar.gz -BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root -BuildArch: noarch -Requires: perl >= 5.8.8 -# these are part of perl with centos6, not with centos7. Could just require perl-core package? -%if 0%{?rhel} >= 7 -Requires: perl-Data-Dumper -Requires: perl-Getopt-Long -Requires: perl-Storable -%endif -Requires: perl-AnyEvent -Requires: perl-Clone -Requires: perl-Data-Validate-IP -Requires: perl-TimeDate -Requires: perl-Digest-SHA -Requires: perl-GRNOC-Config -Requires: perl-GRNOC-Log -Requires: perl-GRNOC-RabbitMQ -Requires: perl-Hash-Merge -Requires: perl-IPC-ShareLite -Requires: perl-JSON-SL -Requires: perl-JSON-XS -Requires: perl-List-MoreUtils -Requires: perl-Math-Round -Requires: perl-Moo -Requires: perl-Net-AMQP-RabbitMQ -Requires: perl-Net-IP -Requires: perl-Number-Bytes-Human -Requires: perl-Parallel-ForkManager -Requires: perl-Path-Class -Requires: perl-Path-Tiny -Requires: perl-Proc-Daemon -Requires: perl-TimeDate -Requires: perl-Time-Duration -Requires: perl-Time-HiRes -Requires: perl-Try-Tiny -Requires: perl-Type-Tiny -Requires: wget - -Requires: rubygem-ipaddress - -%description -GRNOC NetSage Flow-Processing Pipeline - -%prep -%setup -q -n grnoc-netsage-deidentifier-%{version} - -%build -%{__perl} Makefile.PL PREFIX="%{buildroot}%{_prefix}" INSTALLDIRS="vendor" -make - -%install -rm -rf $RPM_BUILD_ROOT -make pure_install - -%{__install} -d -p %{buildroot}/etc/grnoc/netsage/deidentifier/ -%{__install} -d -p %{buildroot}/var/lib/grnoc/netsage/ -%{__install} -d -p %{buildroot}/var/cache/netsage/ -%{__install} -d -p %{buildroot}/usr/bin/ -%{__install} -d -p %{buildroot}/etc/init.d/ -%{__install} -d -p %{buildroot}/etc/systemd/system/ -%{__install} -d -p %{buildroot}/etc/cron.d/ -%{__install} -d -p %{buildroot}/etc/logstash/conf.d/ -%{__install} -d -p %{buildroot}/etc/logstash/conf.d/ruby/ -%{__install} -d -p %{buildroot}/etc/logstash/conf.d/support/ -%{__install} -d -p %{buildroot}/usr/share/logstash/config/ -%{__install} -d -p %{buildroot}/usr/share/doc/grnoc/netsage-deidentifier/ - -%{__install} CHANGES.md %{buildroot}/usr/share/doc/grnoc/netsage-deidentifier/CHANGES.md -%{__install} website/docs/deploy/bare_metal_install.md %{buildroot}/usr/share/doc/grnoc/netsage-deidentifier/INSTALL.md - -%{__install} conf/logging.conf %{buildroot}/etc/grnoc/netsage/deidentifier/logging.conf -%{__install} conf/logging-debug.conf %{buildroot}/etc/grnoc/netsage/deidentifier/logging-debug.conf -%{__install} conf/netsage_shared.xml %{buildroot}/etc/grnoc/netsage/deidentifier/netsage_shared.xml -%{__install} conf/netsage_flow_filter.xml %{buildroot}/etc/grnoc/netsage/deidentifier/netsage_flow_filter.xml -%{__install} conf/netsage_netflow_importer.xml %{buildroot}/etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml -%{__install} conf-logstash/*.conf %{buildroot}/etc/logstash/conf.d/ -%{__install} conf-logstash/*.conf.disabled %{buildroot}/etc/logstash/conf.d/ -%{__install} conf-logstash/ruby/* %{buildroot}/etc/logstash/conf.d/ruby/ -%{__install} conf-logstash/support/* %{buildroot}/etc/logstash/conf.d/support/ - -%if 0%{?rhel} >= 7 -%{__install} systemd/netsage-netflow-importer.service %{buildroot}/etc/systemd/system/netsage-netflow-importer.service -%{__install} systemd/netsage-flow-filter.service %{buildroot}/etc/systemd/system/netsage-flow-filter.service -%{__install} systemd/logstash.service %{buildroot}/etc/systemd/system/logstash.service -%else -%{__install} init.d/netsage-flow-filter-daemon %{buildroot}/etc/init.d/netsage-flow-filter-daemon -%{__install} init.d/netsage-netflow-importer-daemon %{buildroot}/etc/init.d/netsage-netflow-importer-daemon -%endif - -%{__install} cron.d/netsage-scireg-update.cron %{buildroot}/etc/cron.d/netsage-scireg-update.cron -%{__install} cron.d/netsage-maxmind-update.cron %{buildroot}/etc/cron.d/netsage-maxmind-update.cron -%{__install} cron.d/netsage-caida-update.cron %{buildroot}/etc/cron.d/netsage-caida-update.cron -%{__install} cron.d/netsage-memberlists-update.cron %{buildroot}/etc/cron.d/netsage-memberlists-update.cron -%{__install} cron.d/netsage-logstash-restart.cron %{buildroot}/etc/cron.d/netsage-logstash-restart.cron - -%{__install} bin/netsage-flow-filter-daemon %{buildroot}/usr/bin/netsage-flow-filter-daemon -%{__install} bin/netsage-netflow-importer-daemon %{buildroot}/usr/bin/netsage-netflow-importer-daemon - -%{__install} bin/restart-logstash.sh %{buildroot}/usr/bin/restart-logstash.sh - -# clean up buildroot -find %{buildroot} -name .packlist -exec %{__rm} {} \; - -%{_fixperms} $RPM_BUILD_ROOT/* - -%clean -rm -rf $RPM_BUILD_ROOT - -%files - -%defattr(644, root, root, 755) - -# Don't overwrite cron files. Create .rpmnew files if needed. -%config(noreplace) /etc/cron.d/netsage-scireg-update.cron -%config(noreplace) /etc/cron.d/netsage-maxmind-update.cron -%config(noreplace) /etc/cron.d/netsage-caida-update.cron -%config(noreplace) /etc/cron.d/netsage-memberlists-update.cron -%config(noreplace) /etc/cron.d/netsage-logstash-restart.cron - -# Don't overwrite importer configs. Create .rpmnew files if needed. -%config(noreplace) /etc/grnoc/netsage/deidentifier/logging.conf -%config(noreplace) /etc/grnoc/netsage/deidentifier/logging-debug.conf -%config(noreplace) /etc/grnoc/netsage/deidentifier/netsage_shared.xml -%config(noreplace) /etc/grnoc/netsage/deidentifier/netsage_flow_filter.xml -%config(noreplace) /etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml - -# We don't want to overwrite these .confs. Create .rpmnew files if needed. -%config(noreplace) /etc/logstash/conf.d/01-input-rabbit.conf -%config(noreplace) /etc/logstash/conf.d/01-input-multiline-json-file.conf.disabled -%config(noreplace) /etc/logstash/conf.d/01-input-jsonfile.conf.disabled -%config(noreplace) /etc/logstash/conf.d/15-sensor-specific-changes.conf -%config(noreplace) /etc/logstash/conf.d/40-aggregation.conf -%config(noreplace) /etc/logstash/conf.d/99-output-rabbit.conf -%config(noreplace) /etc/logstash/conf.d/99-output-jsonlog.conf.disabled -%config(noreplace) /etc/logstash/conf.d/99-output-multiline-json.conf.disabled -%config(noreplace) /etc/logstash/conf.d/99-output-elastic.conf.disabled -# logstash files that can be updated automatically (if there are updates, the old ver will be in .rpmsave) -%config /etc/logstash/conf.d/10-preliminaries.conf -%config /etc/logstash/conf.d/20-add-id.conf -%config /etc/logstash/conf.d/45-geoip-tagging.conf -%config /etc/logstash/conf.d/50-asn.conf -%config /etc/logstash/conf.d/53-caida-org.conf -%config /etc/logstash/conf.d/55-member-orgs.conf -%config /etc/logstash/conf.d/60-scireg-tagging-fakegeoip.conf -%config /etc/logstash/conf.d/70-deidentify.conf -%config /etc/logstash/conf.d/80-privatize-org.conf -%config /etc/logstash/conf.d/88-preferred-location-org.conf -%config /etc/logstash/conf.d/90-additional-fields.conf -%config /etc/logstash/conf.d/95-cleanup.conf -%config /etc/logstash/conf.d/98-post-process.conf -%config /etc/logstash/conf.d/99-output-stdout.conf.disabled -%config /etc/logstash/conf.d/ruby/anonymize_ipv6.rb -%config /etc/logstash/conf.d/ruby/domestic.rb -%config /etc/logstash/conf.d/support/sensor_groups.json -%config /etc/logstash/conf.d/support/sensor_types.json -%config /etc/logstash/conf.d/support/networkA-members-list.rb.example - -/usr/share/doc/grnoc/netsage-deidentifier/CHANGES.md -/usr/share/doc/grnoc/netsage-deidentifier/INSTALL.md - -%{perl_vendorlib}/GRNOC/NetSage/Deidentifier.pm -%{perl_vendorlib}/GRNOC/NetSage/Deidentifier/Pipeline.pm -%{perl_vendorlib}/GRNOC/NetSage/Deidentifier/WorkerManager.pm -%{perl_vendorlib}/GRNOC/NetSage/Deidentifier/FlowFilter.pm -%{perl_vendorlib}/GRNOC/NetSage/Deidentifier/NetflowImporter.pm - -%defattr(754, root, root, -) -/usr/bin/netsage-flow-filter-daemon -/usr/bin/netsage-netflow-importer-daemon -/usr/bin/restart-logstash.sh - -%if 0%{?rhel} >= 7 -%defattr(644, root, root, -) -/etc/systemd/system/netsage-flow-filter.service -/etc/systemd/system/netsage-netflow-importer.service -/etc/systemd/system/logstash.service -%else -%defattr(754, root, root, -) -/etc/init.d/netsage-flow-filter-daemon -/etc/init.d/netsage-netflow-importer-daemon -%endif - -%defattr(-, root, root, 755) -/var/lib/grnoc/netsage/ -/var/cache/netsage/ - -%post -echo " " -echo "-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*" -echo "AFTER UPGRADING..." -echo " " -echo " * Check config and cron files with .rpmnew and .rpmsave versions to see if any need manual updates." -echo " * Logstash configs 01, 15, 40, and 99 are not replaced by updated versions, so check to see if there are changes. " -echo " * If using 55-member-orgs.conf, make sure you have the required files in support/. See comments in the conf file. " -echo " " -echo " * Note that this rpm puts logstash config files in /etc/logstash/conf.d/ and doesn't manage multiple pipelines in pipelines.yml." -echo " * Nor does it manage multiple init.d files for sensor- or network-specific importers." -echo " " -echo " * IMPORTANT: Be sure the number of logstash pipeline workers is 1, or flow stitching (aggregation) won't work right. **" -echo " * and be sure logstash configs are specified by *.conf in the right directory." -echo " " -echo " * [Re]start logstash, netsage netflow importers (and netsage flow filters for cenic sensors only) " -echo "-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*" -echo " " - diff --git a/grnoc-netsage-pipeline.spec b/grnoc-netsage-pipeline.spec index ad421cb5..af4f1943 100644 --- a/grnoc-netsage-pipeline.spec +++ b/grnoc-netsage-pipeline.spec @@ -117,15 +117,15 @@ rm -rf $RPM_BUILD_ROOT # Don't overwrite these .confs. Create .rpmnew files if needed. %config(noreplace) /etc/logstash/conf.d/01-input-rabbit.conf -%config(noreplace) /etc/logstash/conf.d/01-input-multiline-json-file.conf -%config(noreplace) /etc/logstash/conf.d/01-input-jsonfile.conf %config(noreplace) /etc/logstash/conf.d/15-sensor-specific-changes.conf %config(noreplace) /etc/logstash/conf.d/40-aggregation.conf %config(noreplace) /etc/logstash/conf.d/99-output-rabbit.conf -%config(noreplace) /etc/logstash/conf.d/99-output-jsonlog.conf -%config(noreplace) /etc/logstash/conf.d/99-output-multiline-json.conf -%config(noreplace) /etc/logstash/conf.d/99-output-elastic.conf # logstash files that can be updated automatically (if there are updates, the old ver will be in .rpmsave) +%config(noreplace) /etc/logstash/conf.d/01-input-jsonfile.conf.disabled +%config(noreplace) /etc/logstash/conf.d/01-input-multiline-json-file.conf.disabled +%config(noreplace) /etc/logstash/conf.d/99-output-jsonlog.conf.disabled +%config(noreplace) /etc/logstash/conf.d/99-output-multiline-json.conf.disabled +%config(noreplace) /etc/logstash/conf.d/99-output-elastic.conf.disabled %config /etc/logstash/conf.d/10-preliminaries.conf %config /etc/logstash/conf.d/20-add-id.conf %config /etc/logstash/conf.d/45-geoip-tagging.conf @@ -147,7 +147,7 @@ rm -rf $RPM_BUILD_ROOT %config /etc/logstash/conf.d/support/networkA-members-list.rb.example /usr/share/doc/grnoc/netsage-pipeline/CHANGES.md -#/usr/share/doc/grnoc/netsage-pipeline/INSTALL.md +/usr/share/doc/grnoc/netsage-pipeline/INSTALL.md %defattr(754, root, root, -) /usr/bin/restart-logstash.sh @@ -157,7 +157,7 @@ rm -rf $RPM_BUILD_ROOT %defattr(-, root, root, 755) /var/lib/grnoc/netsage/ -/var/cache/netsage/ +#/var/cache/netsage/ %post echo " " diff --git a/init.d/netsage-flow-filter-daemon b/init.d/netsage-flow-filter-daemon deleted file mode 100644 index b87dee65..00000000 --- a/init.d/netsage-flow-filter-daemon +++ /dev/null @@ -1,77 +0,0 @@ -#!/bin/sh -# -# netsage-flow-filter-daemon init file for starting up the NetSage FlowFilter daemon -# -# chkconfig: 2345 20 80 -# description: Starts and stops the NetSage FlowFilter daemon - -# Source function library. -. /etc/rc.d/init.d/functions - -name="netsage-flow-filter-daemon" -exec="/usr/bin/$name" -## I believe the pid file name is actually set in the deidentifier config! -## This is just using that name. -pidfile="/var/run/$name.pid" -CONFIG="/etc/grnoc/netsage/deidentifier/netsage_flow_filter.xml" -SHAREDCONFIG="/etc/grnoc/netsage/deidentifier/netsage_shared.xml" - -start() { - [ -f $CONFIG ] || exit 6 - [ -x $exec ] || exit 5 - echo -n $"Starting $name: " - daemon "$exec --config $CONFIG" --sharedconfig $SHAREDCONFIG - retval=$? - echo - return $retval -} - -stop() { - echo -n $"Stopping $name: " - if [ -f $pidfile ] - then - # shutdown haven't work, try old way - killproc -p $pidfile $name - retval=$? - else - success "$name shutdown" - fi - echo -n $"Use ps aux to be sure the worker has stopped also" - echo - return $retval -} - -restart() { - echo -n $"Use stop, check with ps, then start" - echo -} - -rh_status() { - status -p $pidfile $name -} - -rh_status_q() { - rh_status >/dev/null 2>&1 -} - - -case "$1" in - start) - rh_status_q && exit 0 - $1 - ;; - stop) - rh_status_q || exit 0 - $1 - ;; - restart) - $1 - ;; - status) - rh_status - ;; - *) - echo $"Usage: $0 {start|stop|status|restart}" - exit 2 -esac -exit $? diff --git a/init.d/netsage-netflow-importer-daemon b/init.d/netsage-netflow-importer-daemon deleted file mode 100644 index 4989bac5..00000000 --- a/init.d/netsage-netflow-importer-daemon +++ /dev/null @@ -1,78 +0,0 @@ -#!/bin/sh -# -# netsage-netflow-importer-daemon init file for starting up the NetSage Netflow importer daemon -# -# chkconfig: 2345 20 80 -# description: Starts and stops the NetSage Netflow Importer daemon - -# Source function library. -. /etc/rc.d/init.d/functions - -name="netsage-netflow-importer-daemon" -exec="/usr/bin/$name" -## The pid file name is actually set in the deidentifier config file! -## This is just using that name. -pidfile="/var/run/$name.pid" -CONFIG="/etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml" -SHAREDCONFIG="/etc/grnoc/netsage/deidentifier/netsage_shared.xml" - -start() { - [ -f $CONFIG ] || exit 6 - [ -x $exec ] || exit 5 - echo -n $"Starting $name: " - daemon "$exec --config $CONFIG --sharedconfig $SHAREDCONFIG" - retval=$? - echo - return $retval -} - -stop() { - echo -n $"Stopping $name: " - if [ -f $pidfile ] - then - # shutdown doesn't work, try old way - killproc -p $pidfile $name - retval=$? - else - success "$name shutdown" - fi - echo -n $"Use ps aux to be sure the daemon and worker both stopped !!" - echo - return $retval -} - -# workers don't always quit, certainly not quickly! -restart() { - echo -n $"Use stop, check ps aux, then start" - echo -} - -rh_status() { - status -p $pidfile $name -} - -rh_status_q() { - rh_status >/dev/null 2>&1 -} - - -case "$1" in - start) - rh_status_q && exit 0 - $1 - ;; - stop) - rh_status_q || exit 0 - $1 - ;; - restart) - $1 - ;; - status) - rh_status - ;; - *) - echo $"Usage: $0 {start|stop|status|restart}" - exit 2 -esac -exit $? diff --git a/lib/GRNOC/NetSage/Deidentifier.pm b/lib/GRNOC/NetSage/Deidentifier.pm deleted file mode 100644 index d5b9eb3c..00000000 --- a/lib/GRNOC/NetSage/Deidentifier.pm +++ /dev/null @@ -1,9 +0,0 @@ -package GRNOC::NetSage::Deidentifier; - -use strict; -use warnings; - -our $VERSION = "1.2.12"; - -1; - diff --git a/lib/GRNOC/NetSage/Deidentifier/FlowFilter.pm b/lib/GRNOC/NetSage/Deidentifier/FlowFilter.pm deleted file mode 100644 index 55825e11..00000000 --- a/lib/GRNOC/NetSage/Deidentifier/FlowFilter.pm +++ /dev/null @@ -1,278 +0,0 @@ -package GRNOC::NetSage::Deidentifier::FlowFilter; - -use strict; -use warnings; - -use Moo; - -extends 'GRNOC::NetSage::Deidentifier::Pipeline'; - -use GRNOC::Log; -use GRNOC::Config; -use GRNOC::RabbitMQ::Client; - -use AnyEvent; -use Data::Validate::IP; -use Net::IP; -use Digest::SHA; -use POSIX; -use utf8; - -use Data::Dumper; - - -### internal attributes ### - -has handler => ( is => 'rwp'); - -has simp_config => ( is => 'rwp' ); - -has simp_client => ( is => 'rwp'); - -has router => ( is => 'rwp'); - -has router_details => ( is => 'rwp', default => sub { {} } ); - -has snmp_cache_time => ( is => 'rwp', default => 3600 ); - -has stats => ( is => 'rwp', default => sub { { - dropped => 0, - imported => 0 -} } ); - -### constructor builder ### - -sub BUILD { - - my ( $self ) = @_; - - my $config = $self->config; - my $router = $config->{'worker'}->{'router-address'}; - $self->_set_router( $router ); - $self->_set_simp_config( $config->{'simp'} ); - $self->_set_handler( sub { $self->_filter_messages(@_) } ); - $self->_connect_simp(); - $self->get_router_details(); - - my $snmp_cache_time = $config->{'worker'}->{'snmp-cache-time'}; - $self->_set_snmp_cache_time( $snmp_cache_time ) if defined $snmp_cache_time; - - return $self; -} - -### private methods ### - -# expects an array of data for it to filter -# returns the filtered array -sub _filter_messages { - my ( $self, $caller, $messages ) = @_; - - my $finished_messages = $messages; - - my $router_details = $self->router_details; - # drop all messages if we don't have router derailts from simp - if ( keys %$router_details < 1 ) { - $self->_add_dropped_count( @$messages ); - - return []; - } - - my $i = 0; - my @delete_indices = (); - foreach my $message ( @$messages ) { - my $sensor = $message->{'meta'}->{'sensor_id'}; - my $details = $router_details->{ $sensor }; - - my $import_flow = $self->_filter_flow( $message, $details ); - if ( $import_flow < 1 ) { - push @delete_indices, $i; - $self->_add_dropped_count( 1 ); - } - $i++; - } - - # remove all the deleted indices - splice @$finished_messages, $_, 1 for reverse @delete_indices; - - $self->_add_imported_count( scalar @$finished_messages ); - - $self->logger->debug( "stats " . Dumper $self->stats ); - - return $finished_messages; -} - -sub _filter_flow { - my ( $self, $message, $details ) = @_; - - return 0 if !defined ($details) || !defined( $details->{'results'} ) || keys %{ $details->{'results'} } == 0; - - my $src_ifindex = $message->{'meta'}->{'src_ifindex'}; - my $dst_ifindex = $message->{'meta'}->{'dst_ifindex'}; - - if (! defined $dst_ifindex or ! defined $src_ifindex ) { - $self->logger->warn("Missing an ifindex!? Skipping flow.". $message->{'meta'}->{'sensor_id'}); - return 0; - } - - my $num_results = keys ( %{ $details->{'results'} } ); - - return 0 if $num_results < 1; - - my $host = ( keys ( %{ $details->{'results'} } ) )[0]; - - my $mib_base = "1.3.6.1.2.1.31.1.1.1.18"; - my $src_key = "$mib_base.$src_ifindex"; - my $dst_key = "$mib_base.$dst_ifindex"; - - my $src_description = $details->{ 'results' }->{ $host }->{ $src_key }->{ 'value' } || ""; - my $dst_description = $details->{ 'results' }->{ $host }->{ $dst_key }->{ 'value' } || ""; - - - # see if src OR dst description contains [ns-exp] - - my $import = 0; - - if ( $src_description =~ /\[ns-exp\]/ ) { - $self->logger->debug( "IMPORTING src: $src_ifindex!" ); - $import = 1; - } else { - $self->logger->debug( "SKIPPING src: $src_ifindex!" ); - } - - if ( $dst_description =~ /\[ns-exp\]/ ) { - $self->logger->debug( "IMPORTING dst: $dst_ifindex!" ); - $import = 1; - } else { - $self->logger->debug( "SKIPPING dst: $dst_ifindex!" ); - - } - - return $import; - -} - -sub get_router_details { - my ( $self ) = @_; - - my $client = $self->simp_client; - - my $router_details = $self->router_details || {}; - - my $collections = $self->config->{'collection'}; - - if ( ref($collections) ne "ARRAY" ) { - $collections = [ $collections ]; - - } - - foreach my $collection (@$collections) { - - #my $router = $self->router; - my $sensor = $collection->{'sensor'}; - my $router = $collection->{'sensor'}; - # 3/22/21 - simp on netsage-simp is not returning anything by IP (ie router-address), so only use sensor name - # $router = $collection->{'router-address'} if $collection->{'router-address'}; - - my $row = {}; - - my $details = $router_details->{'router'}; - if ( defined $details->{'ts'} ) { - if ( time() - $details->{'ts'} <= $self->snmp_cache_time ) { - return; - } - } - - my %query = ( - node => [$router], - oidmatch => ["1.3.6.1.2.1.31.1.1.1.18.*"] - - ); - - my $results = $client->get( %query ); - -# as of simp 1.6.0, node results are wrapped in the port used to query the data on the node -# 161 is the port used for traditional SNMP - if ( exists( $results->{'results'}->{'161'} ) && %{ $results->{'results'}->{'161'} } ) { - $self->logger->debug( "router found: $router" ); - $row->{'results'} = $results->{'results'}->{'161'}; - $self->logger->debug( "router found in simp: "); ## . Dumper $results->{'results'} ); - } else { - $self->logger->warn( "router NOT found in simp: " . Dumper $router ); - $row->{'results'} = undef; - - } - - my $now = time(); - - $row->{'ts'} = $now; - - $router_details->{ $sensor } = $row; - } - - $self->_set_router_details( $router_details ); - - -} - -sub _add_dropped_count { - my ( $self, $num ) = @_; - $self->_update_stats( { - dropped => $num - }); - -} - -sub _add_imported_count { - my ( $self, $num ) = @_; - $self->_update_stats( { - imported => $num - }); - -} - -sub _update_stats { - my ( $self, $update ) = @_; - my $stats = $self->stats; - my $dropped = $stats->{'dropped'}; - my $imported = $stats->{'imported'}; - if ( $update->{'dropped'} ) { - $dropped += $update->{'dropped'}; - } - if ( $update->{'imported'} ) { - $imported += $update->{'imported'}; - } - - $stats->{'dropped'} = $dropped; - $stats->{'imported'} = $imported; - - $self->_set_stats( $stats ); -} - - -sub _connect_simp { - my ( $self ) = @_; - - my $simp = $self->simp_config; - - my $host = $simp->{'host'}; - my $port = $simp->{'port'} || 5672; - my $user = $simp->{'username'} || "guest"; - my $pass = $simp->{'password'} || "guest"; - my $exchange = $simp->{'exchange'} || "Simp"; - my $timeout = $simp->{'timeout'} || 60; - my $topic = $simp->{'topic'} || "Simp.Data"; - - my $client = GRNOC::RabbitMQ::Client->new( - host => $host, - port => $port, - user => $user, - pass => $pass, - exchange => $exchange, - timeout => $timeout, - topic => $topic); - $self->_set_simp_client( $client ); - return $client; -} - -1; - diff --git a/lib/GRNOC/NetSage/Deidentifier/NetflowImporter.pm b/lib/GRNOC/NetSage/Deidentifier/NetflowImporter.pm deleted file mode 100644 index 48dca956..00000000 --- a/lib/GRNOC/NetSage/Deidentifier/NetflowImporter.pm +++ /dev/null @@ -1,755 +0,0 @@ -package GRNOC::NetSage::Deidentifier::NetflowImporter; - -use strict; -use warnings; - -use Moo; - -extends 'GRNOC::NetSage::Deidentifier::Pipeline'; - -use GRNOC::Log; -use GRNOC::Config; - -use POSIX qw( floor ); -use Net::AMQP::RabbitMQ; -use JSON::XS; -use Math::Round qw( nlowmult nhimult ); -use List::MoreUtils qw( natatime ); -use Try::Tiny; -use Date::Parse; -use Date::Format; -use DateTime; -use File::stat; -use File::Find; -use Path::Class; -use Path::Tiny; -use Storable qw( store retrieve ); -use Sys::Hostname; -use Env; - -use Data::Dumper; - -### required attributes ### - -has config_file => ( is => 'ro', - required => 1 ); - -has logging_file => ( is => 'ro', - required => 1 ); - -### optional attributes ### - -has sensor_id => ( is => 'rwp', default => hostname() ); - -has instance_id => ( is => 'rwp', default => 0 ); - -### internal attributes ### - -has flow_path => ( is => 'rwp' ); - -has json => ( is => 'rwp' ); - -has json_data => ( is => 'rwp' ); - -has status => ( is => 'rwp' ); - -has min_bytes => ( is => 'rwp', - default => 500000000 ); # 500 MB - -has flow_batch_size => ( is => 'rwp' ); - -has status_cache => ( is => 'rwp', - default => sub { {} } ); - -has cache_file => ( is => 'rwp' ); - - -# min_file_age must be one of "older" or "newer". $age must match /^(\d+)([DWMYhms])$/ where D, W, M, Y, h, m and s are "day(s)", "week(s)", "month(s)", "year(s)", "hour(s)", "minute(s)" and "second(s)" -# see http://search.cpan.org/~pfig/File-Find-Rule-Age-0.2/lib/File/Find/Rule/Age.pm -has min_file_age => ( is => 'rwp', - default => '0' ); - -has cull_enable => ( is => 'rwp', - default => 0 ); - -# wait until files are this old to delete them after processing -# in days -has cull_ttl => ( is => 'rwp', - default => 3 ); - -# cull after reading $cull_count files -has cull_count => ( is => 'rwp', - default => 10 ); - -has nfdump_path => ( is => 'rwp' ); - -has flow_type => ( is => 'rwp', - default => 'netflow' ); - -my @files; - -### constructor builder ### -sub getSensorValue() -{ - my $sensor_id = $_[0]; - # check if sensorId value starts with a $ sign, if so get value from env - if (index($sensor_id, '$') == 0) { - my $env_var = substr $sensor_id, 1; ##chop off the $ sign - my $env_value = $ENV{$env_var} || ''; - ## IF the env is set use its value, otherwise fallback on hostname - if ($env_value ne '') { - $sensor_id = $env_value; - } else { - $sensor_id = hostname(); - } - # If the sensor is set to empty string use hostname - } elsif ($sensor_id eq ""){ - $sensor_id = hostname(); - } - - return $sensor_id; -} - - -sub BUILD { - - my ( $self ) = @_; - - my $config = $self->config; - my $sensor_id = &getSensorValue($config->{ 'sensor' } || ''); - - if ( defined ( $sensor_id ) ) { - $self->_set_sensor_id( $sensor_id ); - } - my $instance_id = $config->{ 'instance' }; - - # for some reason if you leave blank, you get - # an empty hashref back. work around that. - if ( defined ( $instance_id ) && ! ( ref $instance_id eq ref {} ) ) { - $self->_set_instance_id( $instance_id ); - } - - $self->logger->debug("instance id: " . $self->instance_id); - - my $flow_batch_size = $config->{'worker'}->{'flow-batch-size'}; - my $cache_file = $config->{'worker'}->{'cache-file'} if not defined $self->cache_file; - $cache_file = '/var/cache/netsage/netflow_importer.cache' if not defined $cache_file; - $self->_set_cache_file( $cache_file ); - $self->logger->debug("cache file: " . $cache_file); - - my $flow_path = $self->flow_path; - - $flow_path = $config->{'worker'}->{'flow-path'} if not defined $flow_path; - - $self->_set_flow_path( $flow_path ); - $self->logger->debug("flow path: " . Dumper $flow_path); - - my $min_file_age = $self->min_file_age; - $min_file_age = $config->{'worker'}->{'min-file-age'} if defined $config->{'worker'}->{'min-file-age'}; - $self->_set_min_file_age( $min_file_age ); - - my $flow_type = $self->flow_type; - $flow_type = $config->{'worker'}->{'flow-type'} if defined $config->{'worker'}->{'flow-type'}; - $self->_set_flow_type( $flow_type ); - $self->logger->debug("flow type: $flow_type"); - - $self->_set_flow_batch_size( $flow_batch_size ); - $self->_set_handler( sub{ $self->_run_netflow_import(@_) } ); - - $self->_set_nfdump_path( $config->{'worker'}->{'nfdump-path'} ) - if defined $config->{'worker'}->{'nfdump-path'}; - - my $min_bytes = $self->min_bytes; - $min_bytes = $config->{'worker'}->{'min-bytes'} if defined $config->{'worker'}->{'min-bytes'}; - $self->_set_min_bytes( $min_bytes ); - - my $cull_enable = $self->cull_enable; - $cull_enable = $config->{'worker'}->{'cull-enable'} if defined $config->{'worker'}->{'cull-enable'}; - $self->_set_cull_enable( $cull_enable ); - - my $cull_ttl = $self->cull_ttl; - $cull_ttl = $config->{'worker'}->{'cull-ttl'} if defined $config->{'worker'}->{'cull-ttl'}; - $self->_set_cull_ttl( $cull_ttl ); - - # create JSON object - my $json = JSON::XS->new(); - - $self->_set_json( $json ); - - $self->_read_cache(); - - return $self; -} - -### public methods ### - -sub _run_netflow_import { - - my ( $self ) = @_; - - # get flow data - my $success = $self->_get_flow_data(); - - # publish flow data - return $self->_publish_flows(); - -} - -sub _get_params { - my ( $self, $collection ) = @_; - my %params = (); - my $config = $self->config; - - my $path = $collection->{'flow-path'} || $self->flow_path; - my $sensor = $collection->{'sensor'} || $self->sensor_id; - my $instance = $collection->{'instance'} || $self->instance_id || ''; - my $flow_type = $collection->{'flow-type'} || $self->flow_type || 'netflow'; - - - %params = ( - path => $path, - sensor => $sensor, - instance => $instance, - flow_type => $flow_type - ); - - - return \%params; -} - -sub _get_flow_data { - my ( $self ) = @_; - - my $flow_batch_size = $self->flow_batch_size; - my $status = $self->status_cache; - - my $collections = $self->config->{'collection'}; - - - if ( ref($collections) ne "ARRAY" ) { - $collections = [ $collections ]; - } - - foreach my $collection ( @$collections ) { - - my $path = $collection->{'flow-path'}; # || $self->flow_path; - # if path doesn't end with an /, add one. Required for finding @paths_to_check. - if ( $path !~ /.+\/$/) { - $path = $path."/"; - } - - my $sensor = &getSensorValue($collection->{'sensor'} || ''); - $self->logger->info( " Doing collection $sensor "); - - my %params = %{ $self->_get_params( $collection ) }; - $params{'flow-path'} = $path; - $params{'sensor'} = $sensor; - - my $min_bytes = $self->min_bytes; - - $self->logger->debug("path: $path"); - $self->logger->debug("min_file_age: " . $self->min_file_age ); - - $self->_cull_flow_files( $path ); - - # We need to compare files to the contents of the cache file to see if they have been imported already. - # --- If files are not being culled, we don't want to compare every file ever saved, so - # --- first, narrow down the list of dirs to look through to only those with dates more recent than N months ago. - my $collection_dir = $path; - my @paths_to_check; - if ( $self->cull_enable < 1 ) { - my $now = DateTime->today; # UTC (at 00:00:00) - my $now_yr = $now->year(); - my $now_mo = $now->month(); - my $now_day = $now->day(); - my $too_old_date = $now->subtract( months => 2 ); # HARDCODED THRESHOLD N (must be less than the cache file culling threshold!) - my $too_old_yr = $too_old_date->year(); - my $too_old_mo = $too_old_date->month(); - my $too_old_day = $too_old_date->day(); - - for (my $yr = $too_old_yr; $yr <= $now_yr ; $yr++) { - for (my $mo = 1; $mo <= 12; $mo++) { - # don't need to continue beyond current month - last if ( $yr == $now_yr and $mo == $now_mo + 1); - # If first and last day of month are not too old, we want to look at all files in that month - my $first_day = DateTime->new( { year=>$yr, month=>$mo, day=>"01" } ); - my $last_day = DateTime->last_day_of_month( { year=>$yr, month=>$mo } ); - if ( $first_day >= $too_old_date and $last_day > $too_old_date ) { - # add dir to list - my $subdir = sprintf("%02d/%02d/", $yr, $mo); - push (@paths_to_check, $collection_dir.$subdir); - $self->logger->debug("will check ".$collection_dir.$subdir); - } - elsif ( $first_day <= $too_old_date and $too_old_date <= $last_day ) { - # if $too_old_date is in the middle of the month, go through the day dirs. - for (my $day = 1; $day <= $last_day->day(); $day++) { - my $day_date = DateTime->new( { year=>$yr, month=>$mo, day=>$day } ); - if ( $day_date >= $too_old_date ) { - my $subdir = sprintf("%02d/%02d/%02d/", $yr, $mo, $day); - push (@paths_to_check, $collection_dir.$subdir); - $self->logger->debug("will check ".$collection_dir.$subdir); - } - } - } - } - } - } else { - # if culling is enabled, it's shouldn't be a big deal to just examine all existing files - @paths_to_check = ( $collection_dir ); - } - - - # Get list of files to compare to cache file contents, exclude files that are too new (< min_file_age) - try { - @files = (); - find({ wanted => sub { find_nfcapd($self, \%params) }, follow => 1 }, @paths_to_check ); - - } catch { - $self->logger->error( "Error retrieving nfcapd file listing: " . Dumper($_) ); - sleep(10); - return; - }; - - # Get list of files to actually import by comparing to cache file record of what's been done before - my @filepaths = (); - for(my $i=0; $i<@files; $i++) { - my $file = $files[$i]; - #$self->logger->debug("file: $file"); - my $file_path = dir( $path, $file ) . ""; - my $stats = stat($file_path); - my $abs = file( $file_path ); - # TODO: changed rel to abs; need a way to figure out a way to convert - # the old rel paths to abs - - - # skip empty files (header and/or footer only). They can cause problems. - if( ! $stats or ! $stats->size ) { - $self->logger->info("*** For $path $file, there are no stats!? skipping."); - next; - } elsif( $stats->size <= 420 ) { - $self->logger->debug("skipping $path $file because size is <= 420"); - next; - } - - my $rel = $abs->relative( $path ) . ""; - if ( exists ( $status->{ $rel } ) ) { - $status->{ $abs } = $status->{ $rel }; - delete $status->{ $rel }; - #warn "$rel being changed to $abs in file cache ..."; - } - if ( exists ( $status->{ $abs } ) ) { - my $entry = $status->{ $abs }; - if ( (!defined $stats) or (!defined $entry) ) { - next; - } - my $mtime_cache = $entry->{'mtime'}; - my $size_cache = $entry->{'size'}; - - # If file size and last-modified time are unchanged, skip it - if ( $mtime_cache == $stats->mtime - && $size_cache == $stats->size ) { - next; - } - } - push @filepaths, dir( $path, $file ) . ""; - - } - @filepaths = sort @filepaths; - - # Read the nfcapd files to import - if ( @filepaths > 0 ) { - my $success = $self->_get_nfdump_data(\@filepaths, %params); - - # --- make cache file smaller. (sub will do nothing if nfcapd file culling is enabled) (if it is enabled, it will cull the cache file itself.) - if ($success) { - $self->logger->debug( "calling cull_cache_file for $sensor"); - $self->_cull_cache_file(); - $self->logger->debug( "done with cull_cache_file for $sensor"); - } - } - - - } # end loop over collections - - -} - -# Loop over files to import, using nfdump to read each. Write cache file after each file is read. -sub _get_nfdump_data { - my ( $self, $flowfiles, %params ) = @_; - - my $sensor = $params{'sensor'}; - my $instance = $params{'instance'}; - - my $path = $params{'path'}; # flow-path - - my $flow_type = $params{'flow_type'}; - - my $status = $self->status_cache; - - my $flow_batch_size = $self->flow_batch_size; - - my $min_bytes = $self->min_bytes; - - my $config_path = $self->nfdump_path; - my $nfdump = '/usr/bin/nfdump'; - # if configured nfdump path is a file and is executable, use it - if ( defined $config_path ) { - if ( -f $config_path && -x _ ) { - $nfdump = $config_path - } else { - $self->logger->error("Invalid nfdump path specified; quitting"); - $self->_set_is_running( 0 ); - return; - } - - } - - my $file_count = 0; - my $cull_count = $self->cull_count; - my @all_data = (); - foreach my $flowfile ( @$flowfiles ) { - - # quit if the process has been told to stop - if ( !$self->is_running ) { - $self->logger->debug("Quitting flowfile loop and returning from _get_nfdump_data()"); - return; - } - - $file_count++; - if ( $cull_count > 0 && $file_count > 0 && $file_count % $cull_count == 0 ) { - $self->_cull_flow_files( $path ); - } - - my $stats = stat($flowfile); - - # If file does not exist, skip this file - if ( !defined $stats ) { - next; - } - - $self->logger->info(" importing file: $flowfile"); - - my $command = "$nfdump -r '$flowfile'"; - $command .= " -a"; # perform aggregation based on 5 tuples - $command .= ' -o "fmt:%ts,%te,%td,%sa,%da,%sp,%dp,%pr,%flg,%fwd,%stos,%ipkt,%ibyt,%opkt,%obyt,%in,%out,%sas,%das,%smk,%dmk,%dtos,%dir,%nh,%nhb,%svln,%dvln,%ismc,%odmc,%idmc,%osmc,%mpls1,%mpls2,%mpls3,%mpls4,%mpls5,%mpls6,%mpls7,%mpls8,%mpls9,%mpls10,%ra,%eng,%bps,%pps,%bpp"'; - $command .= ' -6'; # to get full ipv6 addresses - $command .= ' -L +' . $min_bytes; - $command .= " -N -q"; - $command .= ' |'; - $self->logger->debug(" command:\n$command\n"); - - my $fh; - open($fh, $command); - - my $i = 0; - while ( my $line = <$fh> ) { - my ( $ts,$te,$td,$sa,$da,$sp,$dp,$pr,$flg,$fwd,$stos,$ipkt,$ibyt,$opkt,$obyt,$in,$out,$sas,$das,$smk,$dmk,$dtos,$dir,$nh,$nhb,$svln,$dvln,$ismc,$odmc,$idmc,$osmc,$mpls1,$mpls2,$mpls3,$mpls4,$mpls5,$mpls6,$mpls7,$mpls8,$mpls9,$mpls10,$ra,$eng,$bps,$pps,$bpp ) = split( /\s*,\s*/, $line); - - if ($ts =~ /^Byte/ ) { next; } - - my $start = str2time( $ts ); - my $end = str2time( $te ); - - if ( !defined $start || !defined $end ) { - $self->logger->error("Invalid line in $flowfile. $!. Start or End time is undefined."); - $self->logger->error("line: $line"); - $self->logger->error("ts: $ts start: $start"); - $self->logger->error("te: $te end: $end"); - next; - } - - my $sum_bytes = $ibyt + $obyt; - my $sum_packets = $ipkt + $opkt; - my $proto = ''; - if( $pr =~ /^\d+$/ ) { - $proto = getprotobynumber( $pr ); - } else { - $proto = lc($pr); - } - - my $row = {}; - $row->{'type'} = 'flow'; - $row->{'interval'} = 600; - $row->{'meta'} = {}; - $row->{'meta'}->{'flow_type'} = $flow_type || 'netflow'; - $row->{'meta'}->{'src_ip'} = $sa; - $row->{'meta'}->{'src_port'} = $sp; - $row->{'meta'}->{'dst_ip'} = $da; - $row->{'meta'}->{'dst_port'} = $dp; - $row->{'meta'}->{'protocol'} = $proto; - $row->{'meta'}->{'sensor_id'} = $sensor; - $row->{'meta'}->{'instance_id'} = $instance if $instance ne ''; - $row->{'meta'}->{'src_asn'} = $sas; - $row->{'meta'}->{'dst_asn'} = $das; - $row->{'meta'}->{'src_ifindex'} = $in if $in; - $row->{'meta'}->{'dst_ifindex'} = $out if $out; - $row->{'start'} = $start; - $row->{'end'} = $end; - - $row->{'values'} = {}; - $row->{'values'}->{'duration'} = $td; - $row->{'values'}->{'num_bits'} = $sum_bytes * 8; - $row->{'values'}->{'num_packets'} = $sum_packets; - $row->{'values'}->{'bits_per_second'} = $bps; - $row->{'values'}->{'packets_per_second'} = $pps; - - - push @all_data, $row; - if ( @all_data % $flow_batch_size == 0 ) { - $self->logger->debug("processed " . @all_data . " (up to $flow_batch_size) flows; publishing ... "); - $self->_set_json_data( \@all_data ); - $self->_publish_flows(); - @all_data = (); - } - } - # publish any remaining data - # TODO: improve performance here by waiting until we have full batches - $self->_set_json_data( \@all_data ); - $self->_publish_flows(); - @all_data = (); - - # TODO: changed rel to abs; need a way to figure out a way to convert - # the old rel paths to abs - my $abs = file( $flowfile ); - #my $rel = $abs->relative( $path ) . ""; - $status->{$abs} = { - mtime => $stats->mtime, - size => $stats->size - }; - $self->_set_status_cache( $status ); - $self->_write_cache(); - - } ## end loop over flow files - - - if ( $self->run_once ) { - $self->logger->debug("only running once, stopping"); - $self->_set_is_running( 0 ); - } - - if (!@all_data) { - # @all_data should be empty. success. - return 1; - } else { - # something went wrong - return; - } - - -}; - -### private methods ### - -sub _write_cache { - my ( $self ) = @_; - my $filename = $self->cache_file; - $self->logger->debug( "writing cache file $filename" ); - my $status = $self->status_cache; - store $status, $filename; - $self->logger->debug( "done writing cache file $filename" ); - -} - - -sub _read_cache { - my ( $self ) = @_; - my $filename = $self->cache_file; - $self->logger->debug( "reading cache file $filename" ); - my $status = $self->status_cache; - if ( not -f $filename ) { - open my $fh, '>', $filename - or die "Cache file $filename does not exist, and failed to created it: $!\n"; - close $fh; - store $status, $filename; - } - $status = retrieve $filename; - $self->_set_status_cache( $status ); - $self->logger->debug( "done reading cache file $filename" ); -} - -sub _publish_flows { - my $self = shift; - my $flows = $self->json_data; - if ( defined $flows ) { - $self->_publish_data( $flows ); - } - - $self->_set_json_data( [] ); -} - -sub _cull_flow_files { - my ( $self, $path ) = @_; - my $status = $self->status_cache; - #warn "status " . Dumper $status; - #$self->logger->debug( "cache status" . Dumper $status ); - - if ( $self->cull_enable < 1 ) { - $self->logger->debug("not culling files (disabled)"); - return; - } - - $self->logger->debug("CULLING files (enabled)"); - - - # see how old files should be (in days) - my $cull_ttl = $self->cull_ttl; - - my @cache_remove = (); - my %dirs_to_remove = (); - - - while( my ($filename, $attributes) = each %$status ) { - my $mtime = DateTime->from_epoch( epoch => $attributes->{'mtime'} ); - - my $dur = DateTime::Duration->new( - days => $cull_ttl - ); - - my $dt = DateTime->now; - - if ( DateTime->compare( $mtime, $dt->subtract_duration( $dur ) ) == -1 ) { - # Make sure that the file exists, AND that it is under our main - # flow directory. Just a sanity check to prevent deleting files - # outside the flow data directory tree. - - my $filepath = $filename; - my $realpath = ""; - - try { - $realpath = path( $filepath )->realpath; - - my $subsumes = path( $path )->subsumes( $realpath ); - - # if the flow path does not subsume the file we're asked to delete, - # refuse - if ( !$subsumes ) { - #$self->logger->debug("Tried to delete a file outside the flow path!: " . $realpath . "; path: " . $path); - #push @cache_remove, $filename; - #next; - } - } catch { - # an error here is not necessarily a problem, could just be the file - # doesn't exist - #push @cache_remove, $filename; - #next; - - }; - - #return; - # - - if ( -f $realpath ) { - my $parent = path( $realpath )->parent; - $self->logger->debug("deleting $filepath ..."); - unlink $filepath or $self->logger->error( "Could not unlink $realpath: $!" ); - $dirs_to_remove{ $parent } = 1; - } else { - #warn "file does not exist; would delete from cache"; - - } - push @cache_remove, $filename; - } - - } - foreach my $file ( @cache_remove ) { - delete $status->{$file}; - - } - - foreach my $dir ( keys %dirs_to_remove ) { - rmdir $dir; - - } - $self->_write_cache(); - -} - -# If culling of nfcapd files is not enabled, the cache file can become huge. This cuts it down to last X months. -sub _cull_cache_file { - my ( $self ) = @_; - - # If file culling is enabled, that will also cull the cache file, so just return. - if ( $self->cull_enable == 1 ) { - $self->logger->debug("not running cull_cache_file"); - return; - } - - # delete files older than X months (by filename) from the cache file. - my $cull_to = DateTime->now->subtract( months => 3 ); # UTC datetime HARDCODED THRESHOLD = 3 mo. - # Make sure this is > hardcoded threshold in _get_flow_data. - my $status = $self->status_cache; - - foreach my $key ( keys %$status ) { - # Key = full path and filename in cache file. Parse filename for date and time - my ($file_yr, $file_mo, $file_day, $file_hr, $file_min) = $key =~ /.*(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})$/; - # Make it into a date object - my $file_date = DateTime->new( year => $file_yr, month => $file_mo, day => $file_day, - hour => $file_hr, minute => $file_min, time_zone => "UTC" ); - # Delete if $file_date < $cull_to - if ( DateTime->compare($file_date, $cull_to) == -1 ) { - delete $status->{ $key }; - } - } - - $self->_set_status_cache( $status ); - $self->_write_cache(); -} - - -sub find_nfcapd { - my ( $self, $params ) = @_; - my $path = $params->{'path'}; # flow-path, base dir - my $filepath = $File::Find::name; # full path+filename - return if not defined $filepath; - if ( not -f $filepath ) { - return; - - } - return if $filepath =~ /nfcapd\.current/; - return if $filepath =~ /\.nfstat$/; - - my $name = 'nfcapd.*'; - my $relative = path( $filepath )->relative( $path ); - - # if min_file_age is '0' then we don't care about file age (this is default). - # if not, ignore files younger than min_file_age. - if ( $self->min_file_age ne '0' ) { - if ( ! $self->get_age( "older", $self->min_file_age, $filepath ) ) { - return; - } - } - - push @files, "$relative"; - -} - -sub get_age { - my ( $self, $criterion, $age, $filename ) = @_; - - my ( $interval, $unit ) = ( $age =~ /^(\d+)([DWMYhms])$/ ); - if ( ! $interval or ! $unit ) { - return; - } else { - my %mapping = ( - "D" => "days", - "W" => "weeks", - "M" => "months", - "Y" => "years", - "h" => "hours", - "m" => "minutes", - "s" => "seconds", ); - #exec( sub { - my $dt = DateTime->now; - $dt->subtract( $mapping{$unit} => $interval ); - my $compare_to = $dt->epoch; - my $mtime = stat( $filename )->mtime; - return $criterion eq "older" ? - $mtime < $compare_to : - $mtime > $compare_to; - # } ); - } -} - - -1; diff --git a/lib/GRNOC/NetSage/Deidentifier/Pipeline.pm b/lib/GRNOC/NetSage/Deidentifier/Pipeline.pm deleted file mode 100644 index b3570ba3..00000000 --- a/lib/GRNOC/NetSage/Deidentifier/Pipeline.pm +++ /dev/null @@ -1,641 +0,0 @@ -package GRNOC::NetSage::Deidentifier::Pipeline; - -use strict; -use warnings; - -use Moo; - -use GRNOC::Log; -use GRNOC::Config; - -use Net::AMQP::RabbitMQ; -use JSON::XS; -use Math::Round qw( nlowmult nhimult ); -use List::MoreUtils qw( natatime ); -use Try::Tiny; -use Data::Validate::IP; -use Net::IP; -use Hash::Merge qw( merge ); -use POSIX; - -use Data::Dumper; - -### constants ### - -use constant QUEUE_PREFETCH_COUNT => 20; -use constant QUEUE_PREFETCH_COUNT_NOACK => 0; -use constant QUEUE_FETCH_TIMEOUT => 10 * 1000; -use constant RECONNECT_TIMEOUT => 10; - -### required attributes ### - -has config_file => ( is => 'ro', - required => 1 ); - - -has logging_file => ( is => 'ro', - required => 1 ); - -has process_name => ( is => 'ro', - required => 1 ); - -# input queue, identified by name -#has input_queue_name => ( is => 'ro', -# required => 1 ); - -# output queue, identified by name -#has output_queue_name => ( is => 'ro', -# required => 1 ); - -has handler => ( is => 'rwp'); -# required => 1 ); - -### internal attributes ### - -has logger => ( is => 'rwp' ); - -has config => ( is => 'rwp' ); - -has config_obj => ( is => 'rwp' ); - -has is_running => ( is => 'rwp', - default => 0 ); - -has rabbit_config => ( is => 'rwp' ); - -has task_type => ( is => 'rwp' ); - -has shared_config_file => ( is => 'ro' ); - - -# ack_messages indicates whether to ack rabbit messages. normally, this should be 1 (enabled). -# if you disable this, we don't ack the rabbit messages and they go back in the queue. -# usually this is only desired for testing purposes. Don't touch this unless you -# know what you're doing. -has ack_messages => ( is => 'rwp', - default => 1 ); - -has run_once => ( is => 'rwp', - default => 0 ); - -has rabbit_input => ( is => 'rwp' ); - -has rabbit_output => ( is => 'rwp' ); - -has input_queue => ( is => 'rwp' ); - -has input_channel => ( is => 'rwp' ); - -has output_queue => ( is => 'rwp' ); - -has output_channel => ( is => 'rwp' ); - -has batch_size => ( is => 'rwp' ); - -has json => ( is => 'rwp' ); - -has num_published_messages => ( is => 'rwp', - default => 0 ); - -### constructor builder ### - -sub BUILD { - - my ( $self ) = @_; - - # create and store logger object - my $grnoc_log = GRNOC::Log->new( config => $self->logging_file ); - my $logger = GRNOC::Log->get_logger(); - - $self->_set_logger( $logger ); - - # create and store config object - my $config_obj = GRNOC::Config->new( config_file => $self->config_file, - force_array => 0 ); - - - # create and store shared config object - my $shared_config_obj; - my $shared_config = {}; - if ( defined ( $self->shared_config_file ) ) { - $shared_config_obj = GRNOC::Config->new( config_file => $self->shared_config_file, - force_array => 0 ); - my $new_shared_config = {}; - if ( !$shared_config_obj->{'error'} ) { - $new_shared_config = $shared_config_obj->get('/*'); - if ( $new_shared_config ) { - $shared_config = $new_shared_config; - } - } - } - - my $config_single = $config_obj->get('/*') or die "DEATH2!!"; - - # Merge the hashes; the "single" values should overrride those - # from the "shared" config. - my $config = merge( $config_single, $shared_config ); - - $self->_set_config( $config ); - - $self->_rabbit_config(); - - return $self; -} - -### public methods ### - -sub start { - - my ( $self, $task_type ) = @_; - $self->_set_task_type( $task_type ); - - $self->logger->info( "Starting." ); - - # flag that we're running - $self->_set_is_running( 1 ); - - # change our process name - $0 = $self->process_name . " [worker]"; - - # setup signal handlers - $SIG{'TERM'} = sub { - - $self->logger->info( "Received SIG TERM." ); - $self->stop(); - }; - - $SIG{'HUP'} = sub { - - $self->logger->info( "Received SIG HUP." ); - }; - - # create JSON object - my $json = JSON::XS->new(); - - $self->_set_json( $json ); - - # connect to rabbit queues - $self->_rabbit_connect(); - - if ( $self->task_type && $self->task_type eq "no_input_queue" ) { - $self->start_noinput(); - - } else { - # continually consume messages from rabbit queue, making sure we have to acknowledge them - return $self->_consume_loop(); - } - -} - -sub start_noinput { - my ( $self ) = @_; - - return $self->_consume_noinput(); -} - - -sub stop { - - my ( $self ) = @_; - - $self->logger->debug( 'Stopping.' ); - - # this will cause the consume loop to exit - $self->_set_is_running( 0 ); -} - -### private methods ### - -sub _consume_noinput { - # for no input queue - - my ( $self ) = @_; - - $self->logger->debug( 'Starting consume_noinput loop.' ); - while( 1 ) { - # have we been told to stop? - if ( !$self->is_running ) { - $self->logger->debug( 'Exiting consume_noinput loop.' ); - return 0; - } - my $handler = $self->handler; - $self->handler->( $self ); - sleep RECONNECT_TIMEOUT; - - } - -} - -sub _consume_loop { - # if there is an input queue - - my ( $self ) = @_; - - - my $input_queue = $self->rabbit_config->{'input'}->{'queue'}; - my $input_channel = $self->rabbit_config->{'input'}->{'channel'}; - my $rabbit = $self->rabbit_input; - - $self->logger->debug( 'Starting consume_loop.' ); - while ( 1 ) { - - # have we been told to stop? - if ( !$self->is_running ) { - - $self->logger->debug( 'Exiting consume loop.' ); - return 0; - } - - # receive the next rabbit message - my $rabbit_message; - - my $delivery_tag; - - try { - - $rabbit_message = $rabbit->recv( QUEUE_FETCH_TIMEOUT ); - - - } - - - catch { - - $self->logger->error( "Error receiving rabbit message: $_" ); - - # reconnect to rabbit since we had a failure - $self->_rabbit_connect(); - }; - - # didn't get a message? (eg, no more to retrieve) - if ( !$rabbit_message ) { - - #$self->logger->debug( 'No message received.' ); - - # re-enter loop to retrieve the next message - next; - } - - # try to JSON decode the messages - my $messages; - - $delivery_tag = $rabbit_message->{'delivery_tag'}; - - try { - - $messages = $self->json->decode( $rabbit_message->{'body'} ); - } - - catch { - - $self->logger->error( "Unable to JSON decode message: $_" ); - }; - - if ( !$messages ) { - - try { - - # reject the message and do NOT requeue it since its malformed JSON - $rabbit->reject( $input_channel, $delivery_tag, 0 ); - } - - catch { - - $self->logger->error( "Unable to reject rabbit message: $_" ); - - # reconnect to rabbit since we had a failure - $self->_rabbit_connect(); - }; - } - - # retrieve the next message from rabbit if we couldn't decode this one - next if ( !$messages ); - - # make sure its an array (ref) of messages - if ( ref( $messages ) ne 'ARRAY' ) { - - # make it into a one-element array (needed for rabbit msgs written by logstash) - $messages = [$messages] - - } - - my $num_messages = @$messages; - - my $t1 = time(); - - my $success = $self->_consume_messages( $messages ); - - my $t2 = time(); - my $delta = $t2 - $t1; - - $self->logger->debug( "Consumed $num_messages updates in $delta seconds." ); - - # didn't successfully consume the messages, so reject but requeue the entire message to try again - if ( !$success ) { - - $self->logger->debug( "Rejecting rabbit message, requeueing." ); - - try { - - $rabbit->reject( $input_channel, $rabbit_message->{'delivery_tag'}, 1 ); - } - - catch { - - $self->logger->error( "Unable to reject rabbit message: $_" ); - - # reconnect to rabbit since we had a failure - $self->_rabbit_connect(); - }; - } - - # successfully consumed message, acknowledge it to rabbit - else { - if ( $self->ack_messages ) { - - #$self->logger->debug( "Acknowledging successful message." ); - - try { - - $rabbit->ack( $input_channel, $rabbit_message->{'delivery_tag'} ); - } - - catch { - - $self->logger->error( "Unable to acknowledge rabbit message: $_" ); - - # reconnect to rabbit since we had a failure - $self->_rabbit_connect(); - }; - } else { - # do nothing - $self->logger->warn("Not acking message"); - } - } - } -} - -sub _consume_messages { - - my ( $self, $messages ) = @_; - - my $num_messages = @$messages; - #$self->logger->debug( "---consuming $num_messages messages" ); - - # gather all messages to process - my $flows_to_process = []; - - # handle every message that came within the rabbit message - foreach my $message ( @$messages ) { - - # make sure message is an object/hash (ref) - if ( ref( $message ) ne 'HASH' ) { - - $self->logger->error( "Messages must be an object/hash of data, skipping." ); - next; - } - - # include this to our list of messages to process if it was valid - push( @$flows_to_process, $message ) if $message; - - } - - # process all of the data across all messages - my $success = 1; - - - try { - - $flows_to_process = $self->_process_messages( $flows_to_process ) if ( @$flows_to_process > 0 ); - } - - catch { - - $self->logger->error( "Error processing messages: $_" ); - $success = 0; - }; - # if there's no output queue, eg, we're caching in memory, we don't need to push to rabbit - just return success - if ( $self->task_type && $self->task_type eq "no_output_queue" ) { - return $success; - } - - try { - - $self->_publish_data( $flows_to_process ) if ( @$flows_to_process > 0 ); - } - - catch { - - $self->logger->error( "Error publishing messages: $_" ); - $success = 0; - }; - - return $success; -} - -sub _publish_data { - my ( $self, $messages ) = @_; - my $batch_size = $self->rabbit_config->{'output'}->{'batch_size'}; - if ( ! @$messages ) { - $self->logger->debug("No data found to publish"); - return; - } - - # send a max of $batch_size messages at a time to rabbit - my $it = natatime( $batch_size, @$messages ); - - my $num = $self->num_published_messages; - $num += @$messages; - $self->_set_num_published_messages( $num ); - $self->logger->debug("Publishing up to " . $batch_size . " messages per batch ( this batch " . @$messages . " ); total: " . $num ); - - my $queue = $self->rabbit_config->{'output'}->{'queue'}; - my $channel = $self->rabbit_config->{'output'}->{'channel'}; - my $exchange = $self->rabbit_config->{'output'}->{'exchange'} || ""; - - $self->_rabbit_connect(); - while ( my @finished_messages = $it->() ) { - - $self->rabbit_output->publish( $channel, $queue, $self->json->encode( \@finished_messages ), {'exchange' => $exchange} ); - } - return $messages; - -} - - - -# _process_messages takes an argument of an arrayref of data to process -# and then it calls the specified handler function on it -sub _process_messages { - my ( $self, $flows_to_process ) = @_; - - my $handler = $self->handler; - $flows_to_process = $self->handler->( $self, $flows_to_process ); - - return $flows_to_process; - -} - -sub _rabbit_config { - my ( $self ) = @_ ; - - my $rabbit_config = {}; - my @directions = ('input', 'output'); - - my $config = $self->config; - - foreach my $direction ( @directions ) { - $rabbit_config->{$direction} = {}; - - my $rabbit_host = $config->{ "rabbit_$direction" }->{ "host"}; - $rabbit_config->{$direction}->{'host'} = $rabbit_host; - - my $rabbit_port = $config->{ "rabbit_$direction" }->{ "port" }; - $rabbit_config->{$direction}->{'port'} = $rabbit_port; - - my $rabbit_username = $config->{ "rabbit_$direction" }->{ "username" }; - $rabbit_config->{$direction}->{'username'} = $rabbit_username; - - my $rabbit_password = $config->{ "rabbit_$direction" }->{ "password" }; - $rabbit_config->{$direction}->{'password'} = $rabbit_password; - - my $rabbit_vhost = $config->{ "rabbit_$direction" }->{ "vhost" }; - $rabbit_config->{$direction}->{'vhost'} = $rabbit_vhost if defined $rabbit_vhost; - - my $rabbit_ssl = $config->{ "rabbit_$direction" }->{ "ssl" } || 0; - $rabbit_config->{$direction}->{'ssl'} = $rabbit_ssl if defined $rabbit_ssl; - - my $rabbit_ca_cert = $config->{ "rabbit_$direction" }->{ "cacert" }; - $rabbit_config->{$direction}->{'ca_cert'} = $rabbit_ca_cert if defined $rabbit_ca_cert; - - my $batch_size = $config->{"rabbit_$direction" }->{ "batch_size"} || 100; - $rabbit_config->{$direction}->{'batch_size'} = $batch_size if defined $batch_size; - - my $queue = $config->{"rabbit_$direction" }->{ "queue" }; - $rabbit_config->{$direction}->{'queue'} = $queue; - - my $exchange = $config->{"rabbit_$direction" }->{ "exchange" }; - $rabbit_config->{$direction}->{'exchange'} = $exchange; - - my $channel = $config->{"rabbit_$direction" }->{ "channel" }; - $rabbit_config->{$direction}->{'channel'} = $channel; - - my $durable = $config->{"rabbit_$direction" }->{ "durable" }; - $rabbit_config->{$direction}->{'durable'} = $durable; - - - } - $self->_set_rabbit_config($rabbit_config); - -} - -sub _rabbit_connect { - my ( $self ) = @_; - - my $rabbit_config = $self->rabbit_config; - - my %connected = (); - $connected{'input'} = 0; - $connected{'output'} = 0; - - while ( 1 ) { - - my @directions = ('input', 'output'); - - foreach my $direction ( @directions ) { - - my $rabbit_host = $rabbit_config->{ $direction }->{'host'}; - my $rabbit_port = $rabbit_config->{ $direction }->{'port'}; - my $rabbit_username = $rabbit_config->{ $direction }->{'username'}; - my $rabbit_password = $rabbit_config->{ $direction }->{'password'}; - my $rabbit_ssl = $rabbit_config->{ $direction }->{'ssl'}; - my $rabbit_ca_cert = $rabbit_config->{ $direction }->{'ca_cert'}; - my $rabbit_vhost = $rabbit_config->{ $direction }->{'vhost'}; - my $rabbit_channel = $rabbit_config->{ $direction }->{'channel'}; - my $rabbit_queue = $rabbit_config->{ $direction }->{'queue'}; - my $rabbit_exchange = $rabbit_config->{ $direction }->{'exchange'}; - my $rabbit_durable = $rabbit_config->{ $direction }->{'durable'}; - if ( !defined $rabbit_durable ) { - $rabbit_durable = 1; #default to durable - } - - # $self->logger->debug( "Connecting to $direction RabbitMQ $rabbit_host:$rabbit_port." ); - - $connected{ $direction } = 0; - - try { - - - my $rabbit = Net::AMQP::RabbitMQ->new(); - my $params = {}; - $params->{'port'} = $rabbit_port; - $params->{'user'} = $rabbit_username if $rabbit_username; - $params->{'password'} = $rabbit_password if $rabbit_password; - if ( $rabbit_ssl ) { - $params->{'ssl'} = $rabbit_ssl; - $params->{'ssl_verify_host'} = 0; - $params->{'ssl_cacert'} = $rabbit_ca_cert; - } - if ( $rabbit_vhost ) { - $params->{'vhost'} = $rabbit_vhost; - } - - if ( $rabbit_exchange ) { - $params->{'exchange'} = $rabbit_exchange; - } - - $rabbit->connect( $rabbit_host, $params ); - - if ( $direction eq 'input' ) { - # open channel to the pending queue we'll read from - $rabbit->channel_open( $rabbit_channel ); - $rabbit->queue_declare( $rabbit_channel, $rabbit_queue, {'auto_delete' => 0, durable => $rabbit_durable } ); - if ( $self->ack_messages ) { - $rabbit->basic_qos( $rabbit_channel, { prefetch_count => QUEUE_PREFETCH_COUNT } ); - } else { - #$rabbit->basic_qos( $rabbit_channel ); - $rabbit->basic_qos( $rabbit_channel, { prefetch_count => QUEUE_PREFETCH_COUNT_NOACK } ); - } - $rabbit->consume( $rabbit_channel, $rabbit_queue, {'no_ack' => 0} ); - - } else { - #open channel to the finished queue we'll send to - # - $rabbit->channel_open( $rabbit_channel ); - $rabbit->queue_declare( $rabbit_channel, $rabbit_queue, {'auto_delete' => 0, durable => $rabbit_durable} ); -# -# - - } - - my $setter = "_set_rabbit_$direction"; - $self->$setter( $rabbit ); - -# -# $self->_set_rabbit( $rabbit ); -# - $connected{ $direction } = 1; - } - - catch { - - $self->logger->error( "Error connecting to $direction RabbitMQ: $_" ); - }; - - if ( $connected{'input'} && $connected{'output'}) { - return; - }; - - next if $connected{ $direction }; - - - $self->logger->info( " Reconnecting $direction after " . RECONNECT_TIMEOUT . " seconds..." ); - sleep( RECONNECT_TIMEOUT ); - - } # end foreach directoin - - }# end while 1 - -} - -1; diff --git a/lib/GRNOC/NetSage/Deidentifier/WorkerManager.pm b/lib/GRNOC/NetSage/Deidentifier/WorkerManager.pm deleted file mode 100644 index a734a5f3..00000000 --- a/lib/GRNOC/NetSage/Deidentifier/WorkerManager.pm +++ /dev/null @@ -1,260 +0,0 @@ -package GRNOC::NetSage::Deidentifier::WorkerManager; - -use Moo; -use Types::Standard qw( Str Bool ); - -# this one needs to change -#use GRNOC::NetSage::Deidentifier::WorkerManager::Worker; -#use GRNOC::NetSage::Deidentifier::Pipeline; -## got rid of FlowTagger use GRNOC::NetSage::Deidentifier::FlowTagger; - -use GRNOC::Config; -use GRNOC::Log; - -use Parallel::ForkManager; -use Proc::Daemon; - -use Data::Dumper; - -### required attributes ### - -has config_file => ( is => 'ro', - isa => Str, - required => 1 ); - -has logging_file => ( is => 'ro', - isa => Str, - required => 1 ); - -has worker => ( is => 'ro', - required => 1 ); - -has process_name => ( is => 'ro', - required => 1 ); - -### optional attributes ### - -has daemonize => ( is => 'ro', - isa => Bool, - default => 1 ); - -has task_type => ( is => 'rwp' ); - -### private attributes ### - -has config => ( is => 'rwp' ); - -has logger => ( is => 'rwp' ); - -has children => ( is => 'rwp', - default => sub { [] } ); - -has flow_cache => ( is => 'rwp', - default => sub { {} } ); - -has knot => ( is => 'rwp' ); - -### constructor builder ### - -sub BUILD { - - my ( $self ) = @_; - - # create and store logger object - my $grnoc_log = GRNOC::Log->new( config => $self->logging_file ); - my $logger = GRNOC::Log->get_logger(); - - $self->_set_logger( $logger ); - - # create and store config object - my $config = GRNOC::Config->new( config_file => $self->config_file, - force_array => 0 ); - - $self->_set_config( $config ); - - return $self; -} - -sub _init_cache { - my $self = shift; - - my %flow_cache = (); # $self->flow_cache; - #$flow_cache{'test'} = 'value'; - - my $glue = 'flow'; - - #IPC::Shareable->clean_up_all; - my %options = ( - create => 0, - exclusive => 0, - mode => 0644, - destroy => 0 - ); - - #IPC::Shareable->clean_up; - #IPC::Shareable->clean_up_all; - - #my $knot = tie %flow_cache, 'IPC::Shareable', $glue, { %options } or die ("failed to tie cache"); - - #warn "getting cache ..." . Dumper %flow_cache; - #(tied %flow_cache)->shlock; - #$flow_cache{'locked_adding'} = 'w00t!'; - #%flow_cache = ( - # 'test2' => 'wow!' - #); - #(tied %flow_cache)->shunlock; - #warn "getting cache ..." . Dumper %flow_cache; - - #$self->_set_flow_cache( \%flow_cache ); - #$self->_set_knot( $knot ); - -} - -### public methods ### - -sub start { - - my ( $self, $task_type ) = @_; - - $self->_set_task_type( $task_type ); - - $self->logger->info( 'Starting.' ); - - $self->logger->debug( 'In WorkerManager->start()' ); - $self->logger->debug( 'Setting up signal handlers.' ); - - # setup signal handlers - $SIG{'TERM'} = sub { - - $self->logger->info( 'Received SIG TERM. Calling stop()' ); - $self->stop(); - }; - - $SIG{'HUP'} = sub { - - $self->logger->info( 'Received SIG HUP.' ); - }; - - # need to daemonize - if ( $self->daemonize ) { - - $self->logger->debug( 'Daemonizing.' ); - - my $daemon = Proc::Daemon->new( pid_file => $self->config->get( '/config/master/pid-file' ) ); - my $pid = $daemon->Init(); - - # Orig. process "splits into" orig. and child/daemon. Child/daemon has $pid=0, orig has $pid = pid of the child/daemon. - # Both continue from here. Original writes pid file then exits. Child/daemon keeps running. (???) - $self->logger->debug(" pid from daemon->init = $pid"); - - # if in child/daemon process - if ( !$pid ) { - - $self->logger->debug( 'Created daemon process.' ); - - # change process name of the child/daemon - $0 = $self->process_name."-pipeline-daemon"; - - $self->_create_workers(); - } - } - - # dont need to daemonize - else { - - $self->logger->debug( 'Running in foreground.' ); - - $self->_create_workers(); - } - - return 1; -} - -sub stop { - - my ( $self ) = @_; - - $self->logger->info( 'Stopping.' ); - - my @pids = @{$self->children}; - - $self->logger->debug( 'Stopping child worker processes ' . join( ' ', @pids ) . '.' ); - - return kill( 'TERM', @pids ); -} - -### helper methods ### - -sub _build_config { - - my ( $self ) = @_; - - $self->logger->debug( 'Building GRNOC::Config with config file ' . $self->config_file . '.' ); - - return GRNOC::Config->new( config_file => $self->config_file, - force_array => 0 ); -} - -sub _create_workers { - - my ( $self ) = @_; - - my $num_processes = $self->config->get( '/config/worker/num-processes' ); - - $self->logger->info( "Creating $num_processes child worker processes." ); - - $self->_init_cache(); - - my %flow_cache = %{ $self->flow_cache }; - - my $forker = Parallel::ForkManager->new( $num_processes ); - - # keep track of children pids - $forker->run_on_start( sub { - - my ( $pid ) = @_; - - $self->logger->debug( "Child worker process $pid created." ); - - push( @{$self->children}, $pid ); - } ); - - $forker->run_on_finish( sub { - $self->logger->debug("child process has finished"); - } ); - - for ( 1 .. $num_processes ) { - - $forker->start() and next; - - - #die "done"; - - # create worker in this process - #my $worker = GRNOC::NetSage::Deidentifier::FlowTagger->new( config => $self->config, - # logger => $self->logger, - # config_file => $self->config_file, - # logging_file => $self->logging_file ); - my $worker = $self->worker; - - # this should only return if we tell it to stop via TERM signal etc. -$self->logger->debug(" doing worker->start"); - $worker->start( $self->task_type ); - - # exit child process - $forker->finish(); - } - - $self->logger->debug( 'Waiting for all child worker processes to exit.' ); - - # wait for all children to return - $forker->wait_all_children(); - - $self->_set_children( [] ); - - #(tied %flow_cache)->remove; - - $self->logger->debug( 'All child workers have exited.' ); -} - -1; diff --git a/old_stitcher/FlowStitcher.pm b/old_stitcher/FlowStitcher.pm deleted file mode 100644 index 07c08625..00000000 --- a/old_stitcher/FlowStitcher.pm +++ /dev/null @@ -1,311 +0,0 @@ -package GRNOC::NetSage::Deidentifier::FlowStitcher; - -use strict; -use warnings; - -use Moo; - -extends 'GRNOC::NetSage::Deidentifier::Pipeline'; - -use GRNOC::Log; -use GRNOC::Config; - -#use Data::Validate::IP; -#use Net::IP; -#use Digest::SHA; - -#use JSON::XS; -use Clone qw(clone); -use IPC::ShareLite qw( :lock ); -use Storable qw(freeze thaw); -use Try::Tiny; -use Number::Bytes::Human qw(format_bytes); -use Time::Duration; -use Time::HiRes; -use Data::Dumper; - -### internal attributes ### - -has handler => ( is => 'rwp'); - -#has input_data => ( is => 'rwp', default => [] ); - -has flow_cache => ( is => 'rwp' ); - -has ipc_key => ( is => 'rwp', default => 'flow' ); - -has stats => ( is => 'rw', default => sub { {} } ); - -has acceptable_offset => ( is => 'rwp', default => 5 ); - -has finished_flows => ( is => 'rwp', default => sub { [] } ); - -has latest_timestamp => ( is => 'rwp', default => 0 ); - -### constructor builder ### - -sub BUILD { - - my ( $self ) = @_; - - my $config = $self->config; - - my $ipc_key = $config->{'worker'}->{'ipc-key'}; - $self->_set_ipc_key( $ipc_key ) if defined $ipc_key; - #warn "BUILD ipc_key: $ipc_key"; - - $self->_set_handler( sub { $self->_run_flow_stitching(@_) } ); - - #$self->_run_flow_stitching(); - - return $self; -} - -### private methods ### -sub _init_cache { - my $self = shift; - my %options = ( - create => 0, - exclusive => 0, - mode => 0644, - destroy => 0, - ); - my %cache; - - $self->_set_flow_cache( \%cache ); - - $self->stats( { - stitched_flow_count => 0, - }); - -} - -# expects an array of data for it to stitch -# returns a stitched array? TODO: update this -sub _run_flow_stitching { - my ( $self, $caller, $messages ) = @_; - - $self->_stitch_flows( ); - - - # Flow stitching is a special case in the pipeline in that it doesn't simply - # return values to be stitched and then exit. It explicitly publishes them itself - # and returns an empty array when it's done. This is because it's a long-running process - # that looks at flows over time - $self->_publish_flows( ); - -} - -sub _publish_flows { - my $self = shift; - my $flows = $self->finished_flows; - - $self->_publish_data( $flows ); - $self->_set_finished_flows( [] ); -} - -sub _set_values_strings { - my $obj = shift; - foreach my $key ( keys %$obj ) { - my $val = $obj->{$key}; - next if not defined $val; - if ( ref($val) eq 'HASH' ) { - $val = _set_values_strings( $val ); - } else { - $obj->{$key} = "$val"; - } - } - - return $obj; -} - -sub _stitch_flows { - my ( $self ) = @_; - - my $ipc_key = $self->ipc_key; - #warn "_stitch_flow, ipc_key: $ipc_key"; - - my $cache_all; - my $share = IPC::ShareLite->new( - -key => $ipc_key, - -create => 0, - -destroy => 0, - ) or die $!; - - $share->lock( LOCK_SH ); - if ( not defined $share ) { - $cache_all = {}; - } else { - #warn "thawing cache ..."; - my $fetch = $share->fetch; - if ( $share->fetch ) { - $cache_all = thaw( $share->fetch ); - } else { - $cache_all = {}; - } - } - $self->_set_flow_cache( $cache_all ); - $share->unlock(); - - my $finished_flows = $self->finished_flows; - - my $overlaps = 0; - my $stitchable_flows = 0; - my $stitched_flow_count = 0; - - my $latest_timestamp = $self->latest_timestamp; - - while( my ( $sensor, $cache ) = each %$cache_all ) { - - - while( my ( $five_tuple, $flow_container ) = each %$cache ) { - my $flows = $flow_container->{'flows'}; - if ( @$flows > 0 ) { - my $previous_flow; - my $i = 0; - my %flows_to_remove = (); - foreach my $flow (@$flows ) { - $flow->{'stitching_finished'} = 0; - $flow->{'no_previous'} = 0 if not $flow->{'no_previous'}; - my $start = $flow->{'start'}; - my $end = $flow->{'end'}; - $flow->{'flow_num'} = $i; - $latest_timestamp = $end if $end > $latest_timestamp; - # If there is a previous flow - if ( $previous_flow ) { - # If this flow and the previous flow go together, merge them - # and remove previous flow - if ( $self->_can_stitch_flow( $previous_flow->{'end'}, $start ) ) { - $flow = $self->_stitch_flow( $previous_flow, $flow ); - $flows_to_remove{$i-1} = 1; - $stitched_flow_count++; - $stitchable_flows++; - } else { - # If can't stitch flows, that means that flow has ended and can be output and removed from the cache - $flow->{'stitching_finished'} = 1; - push @$finished_flows, \%{ clone ( $flow )}; - $flows_to_remove{$i} = 1; - } - - } else { - $flow->{'no_previous'}++; - if ( $flow->{'no_previous'} <= 1 ) { - #warn "no previous flow #1; caching"; - } else { - #warn "no previous flow #2; finished"; - $flow->{'stitching_finished'} = 1; - push @$finished_flows, \%{ clone ( $flow )}; - $flows_to_remove{$i} = 1; - } - - } - $previous_flow = $flow; - $i++; - } - - for (my $i=@$flows-1; $i>=0; $i--) { - if ( ( $self->acceptable_offset + $flows->[$i]->{'end'} < $latest_timestamp ) && ( not $flows_to_remove{$i} ) ) { - $flows_to_remove{$i} = 1; - push @$finished_flows, \%{ clone ( $flows->[$i] )}; - } - if ( $flows_to_remove{$i} ) { - splice @$flows, $i, 1; - } - - } - - if ( @$flows < 1 ) { - # no flows for this five tuple; remove it - delete $cache->{$five_tuple}; - - } - - } else { - # no flows for this five tuple; remove it - delete $cache->{$five_tuple}; - - } - - } - - if ( keys %{ $cache_all->{ $sensor } } < 1 ) { - delete $cache_all->{ $sensor }; - - } - - $self->_set_latest_timestamp( $latest_timestamp ); - - $self->_set_finished_flows( $finished_flows ); - - my $stats = $self->stats; - $stats->{'stitched_flow_count'} += $stitched_flow_count; - - # find stats on the final, stitched flows for this run - my $max_stitched_duration = 0; - my $max_stitched_bytes = 0; - my $min_stitched_duration; - while( my ( $five_tuple, $flow_container ) = each %$cache ) { - foreach my $row ( @{$flow_container->{'flows'}} ) { - my $bytes = $row->{'values'}->{'num_bits'} / 8; - my $duration = $row->{'values'}->{'duration'}; - if ( $duration > $max_stitched_duration ) { - $max_stitched_duration = $duration; - } - if ( $bytes > $max_stitched_bytes ) { - $max_stitched_bytes = $bytes; - } - } - - } - - $self->stats( $stats ); - - - } # end while sensors - - - # save updated cache - - $self->_set_flow_cache( $cache_all ); - $share->lock( LOCK_EX ); - $share->store( freeze( $cache_all ) ); - $share->unlock(); - -} - -# stitches an individual flow -sub _stitch_flow { - my ($self, $flowA, $flowB) = @_; - - my $flow1; - my $flow2; - - # make sure flow1 comes before flow2; - if ( $flowA->{'start'} < $flowB->{'start'} ) { - $flow1 = $flowA; - $flow2 = $flowB; - } else { - $flow1 = $flowB; - $flow2 = $flowA; - } - - $flow1->{'end'} = $flow2->{'end'}; - $flow1->{'values'}->{'duration'} += $flow2->{'values'}->{'duration'}; - $flow1->{'values'}->{'num_bits'} += $flow2->{'values'}->{'num_bits'}; - $flow1->{'values'}->{'num_packets'} += $flow2->{'values'}->{'num_packets'}; - $flow1->{'stitched'} = 1; - - return $flow1; - -} - -sub _can_stitch_flow { - my ($self, $time1, $time2) = @_; - if ( abs ( $time1 - $time2 ) < $self->acceptable_offset ) { - return 1; - } else { - return 0; - } -} - -1; diff --git a/old_stitcher/netsage-flow-stitcher-daemon b/old_stitcher/netsage-flow-stitcher-daemon deleted file mode 100644 index 236a3dbe..00000000 --- a/old_stitcher/netsage-flow-stitcher-daemon +++ /dev/null @@ -1,73 +0,0 @@ -#!/bin/sh -# -# netsage-flow-stitcher-daemon init file for starting up the NetSage Flow Stitcher daemon -# -# chkconfig: 2345 20 80 -# description: Starts and stops the NetSage Flow Stitcher daemon - -# Source function library. -. /etc/rc.d/init.d/functions - -name="netsage-flow-stitcher-daemon" -exec="/usr/bin/$name" -pidfile="/var/run/$name.pid" -CONFIG="/etc/grnoc/netsage/deidentifier/netsage_flow_stitcher.xml" - -start() { - [ -f $CONFIG ] || exit 6 - [ -x $exec ] || exit 5 - echo -n $"Starting $name: " - daemon "$exec --config $CONFIG" - retval=$? - echo - return $retval -} - -stop() { - echo -n $"Stopping $name: " - if [ -f $pidfile ] - then - # shutdown haven't work, try old way - killproc -p $pidfile $name - retval=$? - else - success "$name shutdown" - fi - echo - return $retval -} - -restart() { - stop - start -} - -rh_status() { - status -p $pidfile $name -} - -rh_status_q() { - rh_status >/dev/null 2>&1 -} - - -case "$1" in - start) - rh_status_q && exit 0 - $1 - ;; - stop) - rh_status_q || exit 0 - $1 - ;; - restart) - $1 - ;; - status) - rh_status - ;; - *) - echo $"Usage: $0 {start|stop|status|restart}" - exit 2 -esac -exit $? diff --git a/old_stitcher/netsage_flow_stitcher.xml b/old_stitcher/netsage_flow_stitcher.xml deleted file mode 100644 index e390ccc3..00000000 --- a/old_stitcher/netsage_flow_stitcher.xml +++ /dev/null @@ -1,38 +0,0 @@ - - - - 127.0.0.1 - 5672 - xxxxx - xxxxx - 100 - / - netsage_deidentifier_raw2 - 2 - 0 - /path/to/cert.crt - 1 - - - 127.0.0.1 - 5672 - xxxxx - xxxxx - 100 - / - netsage_deidentifier_stitched - 3 - 0 - 1 - /path/to/cert.crt - - - - - 1 - - - - /var/run/netsage-flow-stitcher-daemon.pid - - diff --git a/systemd/netsage-flow-filter.service b/systemd/netsage-flow-filter.service deleted file mode 100644 index e034b30b..00000000 --- a/systemd/netsage-flow-filter.service +++ /dev/null @@ -1,19 +0,0 @@ -[Unit] -Description=NetSage Pipeline Flow Filter -After=rabbitmq-server.service -Requires=rabbitmq-server.service - -[Service] -User=root -Group=root -Environment=CONFIG=/etc/grnoc/netsage/deidentifier/netsage_flow_filter A.xml -Environment=SHAREDCONFIG=/etc/grnoc/netsage/deidentifier/netsage_shared .xml -ExecStart=/usr/bin/netsage-flow-filter-daemon --config ${CONFIG} --sharedconfig ${SHAREDCONFIG} --nofork - -# We don't have HUP capability yet -# We might want to try restarting automatically, but not now -# Restart=on-failure -# RestartSec=30s - -[Install] -WantedBy=multi-user.target diff --git a/systemd/netsage-netflow-importer.service b/systemd/netsage-netflow-importer.service deleted file mode 100644 index f509039b..00000000 --- a/systemd/netsage-netflow-importer.service +++ /dev/null @@ -1,21 +0,0 @@ -[Unit] -Description=Netsage Pipeline Importer -After=rabbitmq-server.service -Requires=rabbitmq-server.service - -[Service] -User=root -Group=root -Environment=CONFIG=/etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml -Environment=SHAREDCONFIG=/etc/grnoc/netsage/deidentifier/netsage_shared.xml -ExecStart=/usr/bin/netsage-netflow-importer-daemon --config ${CONFIG} --sharedconfig ${SHAREDCONFIG} --nofork -ExecStopPost=/bin/echo "Use ps to be sure the daemon and worker both stopped" - -# PID file location is set in importer config file. Probably /var/run/. -# We don't have HUP capability yet -# We might want to try restarting automatically, but not now -# Restart=on-failure -# RestartSec=30s - -[Install] -WantedBy=multi-user.target From 40ef852711f06cd726cbaa7705a724dacfeb04c9 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 14 Apr 2022 21:46:27 +0000 Subject: [PATCH 059/126] removing pmacct requirement from spec file for now --- .gitignore | 7 +++++++ grnoc-netsage-pipeline.spec | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index c2336b48..f9b3cd42 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,10 @@ replayData userConfig docker-compose.override.yml node_modules + +Makefile +blib +blib/* +grnoc-netsage-pipeline-2.0.0.tar.gz +pm_to_blib + diff --git a/grnoc-netsage-pipeline.spec b/grnoc-netsage-pipeline.spec index af4f1943..b71045f1 100644 --- a/grnoc-netsage-pipeline.spec +++ b/grnoc-netsage-pipeline.spec @@ -46,7 +46,7 @@ BuildArch: noarch Requires: wget Requires: logstash >= 7.16.2 Requires: rubygem-ipaddress -Requires: pmaccct >= 1.7.7 +#Requires: pmacct >= 1.7.7 %description GRNOC NetSage Flow-Processing Pipeline From 0135d6cf3db92861d8c3d5ed83de65478c45d887 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Tue, 19 Apr 2022 17:29:52 +0000 Subject: [PATCH 060/126] Added a check to the spec file to see if pmacct has been installed (manually) --- grnoc-netsage-pipeline.spec | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/grnoc-netsage-pipeline.spec b/grnoc-netsage-pipeline.spec index b71045f1..fad14b83 100644 --- a/grnoc-netsage-pipeline.spec +++ b/grnoc-netsage-pipeline.spec @@ -46,7 +46,7 @@ BuildArch: noarch Requires: wget Requires: logstash >= 7.16.2 Requires: rubygem-ipaddress -#Requires: pmacct >= 1.7.7 +#Requires: pmacct = 1.7.7 (Not installed by rpm; see post section below for a check. Update ver num there!) %description GRNOC NetSage Flow-Processing Pipeline @@ -160,6 +160,21 @@ rm -rf $RPM_BUILD_ROOT #/var/cache/netsage/ %post +# make sure pmacct is installed (no rpm so can't just require it) +echo " " +echo "-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*" +if [ -f /usr/local/sbin/nfacctd ]; then + echo "PLEASE CHECK: " + echo "It looks like pmacct has been installed." + echo "Check the version with sfacctd -V and nfacctd -V." + echo "The Netage Pipeline has been tested with version 1.7.7." +else + echo "WARNING: " + echo "Required package pmacct does not appear to have been installed. " + echo "See the NDCA doc or pmacct on github for instructions." + echo "The Netage Pipeline has been tested with version 1.7.7." +fi + echo " " echo "-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*" echo "AFTER UPGRADING..." From 286be3bbda083fd0f625984977a489240f7b63f5 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Tue, 19 Apr 2022 21:27:09 +0000 Subject: [PATCH 061/126] Added systemd files, conf files, pretag.map file, spec file changes for sfacctd and nfacctd --- MANIFEST | 5 +++ conf-pmacct/nfacctd.conf | 74 ++++++++++++++++++++++++++++++++++++ conf-pmacct/pretag.map | 5 +++ conf-pmacct/sfacctd.conf | 75 +++++++++++++++++++++++++++++++++++++ grnoc-netsage-pipeline.spec | 14 ++++++- systemd/nfacctd.service | 11 ++++++ systemd/sfacctd.service | 11 ++++++ 7 files changed, 194 insertions(+), 1 deletion(-) create mode 100644 conf-pmacct/nfacctd.conf create mode 100644 conf-pmacct/pretag.map create mode 100644 conf-pmacct/sfacctd.conf create mode 100644 systemd/nfacctd.service create mode 100644 systemd/sfacctd.service diff --git a/MANIFEST b/MANIFEST index ac9f6cf0..24becee5 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1,6 +1,9 @@ grnoc-netsage-pipeline.spec CHANGES.md bin/restart-logstash.sh +conf-pmacct/sfacctd.conf +conf-pmacct/nfacctd.conf +conf-pmacct/pretag.map conf-logstash/01-input-jsonfile.conf.disabled conf-logstash/01-input-multiline-json-file.conf.disabled conf-logstash/01-input-rabbit.conf @@ -35,6 +38,8 @@ cron.d/netsage-scireg-update.cron cron.d/netsage-logstash-restart.cron cron.d/netsage-memberlists-update.cron systemd/logstash.service +systemd/sfacctd.service +systemd/nfacctd.service Makefile.PL MANIFEST README.md diff --git a/conf-pmacct/nfacctd.conf b/conf-pmacct/nfacctd.conf new file mode 100644 index 00000000..3f158f5d --- /dev/null +++ b/conf-pmacct/nfacctd.conf @@ -0,0 +1,74 @@ +! PMACCT CONFIG FOR NETFLOW + +!# debug: true + +! Port nfacctd should listen to +nfacctd_port: 9999 + +! Get a value for 'label' from the pre_tag_map file. +! We use this to encode the sensor name for each port. + pre_tag_map: /etc/pmacct/pretag.map + +! FOR PRINTING FILES +! Default is tab-separated output. If 'label' or any variable length field is in the aggregation list, you have to use csv format. +!# plugins: print +!# print_output_file: /tmp/nfacct/%Y/%m/%d/nfacct.%Y%m%d-%H%M +!# print_output: csv +! How often to write to file (default is 60 sec) +!# print_refresh_time: 10 + +! FOR SENDING FLOWS TO THE LOCAL RABBIT QUEUE + plugins: amqp + amqp_host: localhost + amqp_user: guest + amqp_passwd: guest + amqp_exchange_type: direct + amqp_exchange: amq.direct + amqp_persistent_msg: true + amqp_vhost: / + amqp_routing_key: netsage_deidentifier_raw + +! To aggregate flows over time (x sec)- +! 1. Set print_refresh_time or amqp_refresh_time to x sec. This is the period over which it will aggregate / how often to output. +! 2. Do not include times (timestamp_start and timestamp_end) in aggregate list. +! 3. Set nfacctd_stitching: true +! BUT FOR NETSAGE, DO NOT AGGREGATE NETFLOW OVER TIME WITH PMACCT! + +! How often to write to the rabbit queue (default is 60 sec) + amqp_refresh_time: 10 + +! Fields to aggregate on - include all fields we want in the ouput. See pmacct docs for field meanings. +! For netflow, the available timestamps are timestamp_start and timestamp_end (since the netflow exporter itself aggregates over time). +!! Note: Sampling_rate is just a flag to say whether it is known and applied. +!! Before a netflow template arrives, sampling_rate = 0 and no correction is applied! +!! There may be a logstash conf that drops flows with sampling_rate = 0. This should be removed if there really IS no sampling! + aggregate: src_host, dst_host, src_port, dst_port, proto, src_as, dst_as, in_iface, out_iface, sampling_rate, label, timestamp_start, timestamp_end + +! Stitching - determine and add timestamp_min and timestamp_max fields + nfacctd_stitching: false + +! Output timestamps as epochs rather than strings that we need to parse + timestamps_since_epoch: true +! Don't round off to whole seconds + timestamps_secs: false + +! Gets the sampling rate from flow packet and automatically applies it +! Example: If sample_rate is 1000 (meaning 1/1000) then it multiplies +! packets and bytes by 1000. + nfacctd_renormalize: true + +! Get AS info from the netflow datagrams + nfacctd_as: netflow + +! write to /var/log/messages + syslog: local0 + +! save template file for netflow to use on next startup +!## nfacctd_templates_file: /path/file + +! Increase buffer size for larger numbers of flows. +! I'M NOT SURE WHAT THIS SHOULD BE! + plugin_buffer_size: 10240 + plugin_pipe_size: 10240000 + +! additional aggregate_primitives fields for netflow ? diff --git a/conf-pmacct/pretag.map b/conf-pmacct/pretag.map new file mode 100644 index 00000000..008c3f97 --- /dev/null +++ b/conf-pmacct/pretag.map @@ -0,0 +1,5 @@ +! This file is used to set sensor names according to the ports the flows came in on. +! Label should be "sfacct--" or "nfacct--" (whichever is appropriate) followed by the sensor name with spaces replaced by #'s + +set_label=sfacct--Sflow#Sensor filter='(port 9998)' +set_label=nfacct--Netflow#Sensor filter='(port 9999)' diff --git a/conf-pmacct/sfacctd.conf b/conf-pmacct/sfacctd.conf new file mode 100644 index 00000000..ea2bfc29 --- /dev/null +++ b/conf-pmacct/sfacctd.conf @@ -0,0 +1,75 @@ +! PMACCT CONFIG FOR SFLOW + +!# debug: true + +! Port sfacctd should listen to +sfacctd_port: 9998 + +! Get a value for 'label' from the pre_tag_map file. +! We use this to encode the sensor name for each port. + pre_tag_map: /etc/pmacct/pretag.map + +! FOR PRINTING TO FILES +! Default format is tab-separated. If 'label' or any variable length field is in the aggregation list, you have to use csv format. +!## plugins: print +!## print_output_file: /tmp/sfacct/%Y/%m/%d/sfacct.%Y%m%d-%H%M +!## print_output: csv +! How often to write to file (default = 60 sec) +!## print_refresh_time: 300 + +! FOR SENDING FLOWS TO THE LOCAL RABBIT QUEUE + plugins: amqp + amqp_host: localhost + amqp_user: guest + amqp_passwd: guest + amqp_exchange_type: direct + amqp_exchange: amq.direct + amqp_persistent_msg: true + amqp_vhost: / + amqp_routing_key: netsage_deidentifier_raw + +! FOR NETSAGE, AGGREGATE SFLOW OVER TIME (5 minutes) WITH PMACCT +! To aggregate flows over time (x sec)- +! 1. Set print_refresh_time or amqp_refresh_time to x sec. +! 2. Do not include timestamp_arrival in aggregate list. +! Timestamp_min and timestamp_max will be added to output automatically, do not include in aggregate list. +! 3. Set sfacctd_stitching: true + +! How often to write to rabbit queue = time to aggregate over (default = 60 sec) + amqp_refresh_time: 300 + +! Fields to aggregate on - include all fields we want in the ouput. See pmacct docs for field meanings +! For sflow, the only available timestamp is timestamp_arrival (arrival at collector). Do not include it to aggregate over time. +!! Note: Sampling_rate is just a flag to say whether it is known and applied (could be a pmacct bug). +!! There may be a logstash conf that drops flows with Sampling_rate = 0, which needs to be removed if there really is no sampling. +! FOR SFLOW AGGREGATION: + aggregate: src_host, dst_host, src_port, dst_port, proto, src_as, dst_as, in_iface, out_iface, sampling_rate, label + +! Stitching - determine and add timestamp_min and timestamp_max fields + sfacctd_stitching: true + +! Output timestamps as epochs rather than strings that we need to parse + timestamps_since_epoch: true +! Don't round off to whole seconds + timestamps_secs: false + +! Gets the sampling rate from flow packet and automatically applies it +! Example: If sample_rate is 1000 (meaning 1/1000) then it multiplies +! packets and bytes by 1000. + sfacctd_renormalize: true + +! Get AS info from the sflow datagrams + sfacctd_as: sflow + +! write to /var/log/messages + syslog: local0 + +! Increase buffer size for larger numbers of flows. +! I'M NOT SURE WHAT THIS SHOULD BE! + plugin_buffer_size: 10240 + plugin_pipe_size: 10240000 + +! there's no template file for sflow +! no additional aggregate_primitives fields for sflow ? + + diff --git a/grnoc-netsage-pipeline.spec b/grnoc-netsage-pipeline.spec index fad14b83..1ae83ece 100644 --- a/grnoc-netsage-pipeline.spec +++ b/grnoc-netsage-pipeline.spec @@ -71,6 +71,7 @@ make pure_install %{__install} -d -p %{buildroot}/usr/bin/ %{__install} -d -p %{buildroot}/etc/cron.d/ %{__install} -d -p %{buildroot}/etc/systemd/system/ +%{__install} -d -p %{buildroot}/etc/pmacct/ %{__install} -d -p %{buildroot}/etc/logstash/conf.d/ %{__install} -d -p %{buildroot}/etc/logstash/conf.d/ruby/ %{__install} -d -p %{buildroot}/etc/logstash/conf.d/support/ @@ -86,6 +87,10 @@ make pure_install %{__install} cron.d/netsage-logstash-restart.cron %{buildroot}/etc/cron.d/netsage-logstash-restart.cron %{__install} systemd/logstash.service %{buildroot}/etc/systemd/system/logstash.service +%{__install} systemd/sfacctd.service %{buildroot}/etc/systemd/system/sfacctd.service +%{__install} systemd/nfacctd.service %{buildroot}/etc/systemd/system/nfacctd.service + +%{__install} conf-pmacct/* %{buildroot}/etc/pmacct/ %{__install} conf-logstash/*.conf %{buildroot}/etc/logstash/conf.d/ %{__install} conf-logstash/*.conf.disabled %{buildroot}/etc/logstash/conf.d/ @@ -116,6 +121,9 @@ rm -rf $RPM_BUILD_ROOT %config(noreplace) /etc/cron.d/netsage-logstash-restart.cron # Don't overwrite these .confs. Create .rpmnew files if needed. +%config(noreplace) /etc/pmacct/sfacctd.conf +%config(noreplace) /etc/pmacct/nfacctd.conf +%config(noreplace) /etc/pmacct/pretag.map %config(noreplace) /etc/logstash/conf.d/01-input-rabbit.conf %config(noreplace) /etc/logstash/conf.d/15-sensor-specific-changes.conf %config(noreplace) /etc/logstash/conf.d/40-aggregation.conf @@ -154,6 +162,8 @@ rm -rf $RPM_BUILD_ROOT %defattr(644, root, root, -) /etc/systemd/system/logstash.service +/etc/systemd/system/sfacctd.service +/etc/systemd/system/nfacctd.service %defattr(-, root, root, 755) /var/lib/grnoc/netsage/ @@ -180,10 +190,12 @@ echo "-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*" echo "AFTER UPGRADING..." echo " " echo " * Check config and cron files with .rpmnew and .rpmsave versions to see if any need manual updates." -echo " * Logstash configs 01, 15, 40, and 99 are not replaced by updated versions, so check to see if there are changes. " +echo " * Pmacct configs: /etc/pmacct/. Logstash configs: /etc/logstash/conf.d/." +echo " * Pmacct configs and Logstash configs 01, 15, 40, and 99 are not replaced by updated versions, so check for changes. " echo " * If using 55-member-orgs.conf, make sure you have the required files in support/. See comments in the conf file. " echo " " echo " * Note that this rpm puts logstash config files in /etc/logstash/conf.d/ and doesn't manage multiple pipelines in pipelines.yml." +echo " * Nor does it manage multiple pmacct processes." echo " " echo " * IMPORTANT: Be sure the number of logstash pipeline workers is 1, or flow stitching (aggregation) won't work right. **" echo " * and be sure logstash configs are specified by *.conf in the right directory." diff --git a/systemd/nfacctd.service b/systemd/nfacctd.service new file mode 100644 index 00000000..a2341c5c --- /dev/null +++ b/systemd/nfacctd.service @@ -0,0 +1,11 @@ +[Unit] +Description=nfacctd daemon providing Netflow collection service +Wants=network.target +After=network.target +ConditionPathExists=/etc/pmacct/nfacctd.conf + +[Service] +ExecStart=/usr/local/sbin/nfacctd -f /etc/pmacct/nfacctd.conf + +[Install] +WantedBy=multi-user.target diff --git a/systemd/sfacctd.service b/systemd/sfacctd.service new file mode 100644 index 00000000..bedc0426 --- /dev/null +++ b/systemd/sfacctd.service @@ -0,0 +1,11 @@ +[Unit] +Description=sfacctd daemon providing Sflow collection service +Wants=network.target +After=network.target +ConditionPathExists=/etc/pmacct/sfacctd.conf + +[Service] +ExecStart=/usr/local/sbin/sfacctd -f /etc/pmacct/sfacctd.conf + +[Install] +WantedBy=multi-user.target From 3897439bf28bd5b89407de2f958fd2783051aa63 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 21 Apr 2022 17:49:20 +0000 Subject: [PATCH 062/126] Made separate default pre_tag_map files for sflow and netflow. --- MANIFEST | 3 ++- conf-pmacct/nfacct-pretag.map | 5 +++++ conf-pmacct/nfacctd.conf | 9 +++++---- conf-pmacct/pretag.map | 5 ----- conf-pmacct/sfacct-pretag.map | 5 +++++ conf-pmacct/sfacctd.conf | 3 ++- grnoc-netsage-pipeline.spec | 3 ++- 7 files changed, 21 insertions(+), 12 deletions(-) create mode 100644 conf-pmacct/nfacct-pretag.map delete mode 100644 conf-pmacct/pretag.map create mode 100644 conf-pmacct/sfacct-pretag.map diff --git a/MANIFEST b/MANIFEST index 24becee5..ba143547 100644 --- a/MANIFEST +++ b/MANIFEST @@ -3,7 +3,8 @@ CHANGES.md bin/restart-logstash.sh conf-pmacct/sfacctd.conf conf-pmacct/nfacctd.conf -conf-pmacct/pretag.map +conf-pmacct/sfacct-pretag.map +conf-pmacct/nfacct-pretag.map conf-logstash/01-input-jsonfile.conf.disabled conf-logstash/01-input-multiline-json-file.conf.disabled conf-logstash/01-input-rabbit.conf diff --git a/conf-pmacct/nfacct-pretag.map b/conf-pmacct/nfacct-pretag.map new file mode 100644 index 00000000..acc896a3 --- /dev/null +++ b/conf-pmacct/nfacct-pretag.map @@ -0,0 +1,5 @@ +! This file is referenced in a config file and used to set the "label" field to a sensor name. +! Label should be "sfacct--" or "nfacct--" (for sflow or netflow, respectively) +! followed by the sensor name with spaces replaced by #'s + +set_label=nfacct--Netflow#Sensor diff --git a/conf-pmacct/nfacctd.conf b/conf-pmacct/nfacctd.conf index 3f158f5d..74050249 100644 --- a/conf-pmacct/nfacctd.conf +++ b/conf-pmacct/nfacctd.conf @@ -1,4 +1,5 @@ ! PMACCT CONFIG FOR NETFLOW +! Settings most likely to need changes: NFACCTD_PORT, PRE_TAG_MAP, and AMQP_ROUTING_KEY !# debug: true @@ -7,7 +8,7 @@ nfacctd_port: 9999 ! Get a value for 'label' from the pre_tag_map file. ! We use this to encode the sensor name for each port. - pre_tag_map: /etc/pmacct/pretag.map +pre_tag_map: /etc/pmacct/nfacct-pretag.map ! FOR PRINTING FILES ! Default is tab-separated output. If 'label' or any variable length field is in the aggregation list, you have to use csv format. @@ -15,7 +16,7 @@ nfacctd_port: 9999 !# print_output_file: /tmp/nfacct/%Y/%m/%d/nfacct.%Y%m%d-%H%M !# print_output: csv ! How often to write to file (default is 60 sec) -!# print_refresh_time: 10 +!# print_refresh_time: 60 ! FOR SENDING FLOWS TO THE LOCAL RABBIT QUEUE plugins: amqp @@ -35,14 +36,14 @@ nfacctd_port: 9999 ! BUT FOR NETSAGE, DO NOT AGGREGATE NETFLOW OVER TIME WITH PMACCT! ! How often to write to the rabbit queue (default is 60 sec) - amqp_refresh_time: 10 + amqp_refresh_time: 60 ! Fields to aggregate on - include all fields we want in the ouput. See pmacct docs for field meanings. ! For netflow, the available timestamps are timestamp_start and timestamp_end (since the netflow exporter itself aggregates over time). !! Note: Sampling_rate is just a flag to say whether it is known and applied. !! Before a netflow template arrives, sampling_rate = 0 and no correction is applied! !! There may be a logstash conf that drops flows with sampling_rate = 0. This should be removed if there really IS no sampling! - aggregate: src_host, dst_host, src_port, dst_port, proto, src_as, dst_as, in_iface, out_iface, sampling_rate, label, timestamp_start, timestamp_end + aggregate: src_host, dst_host, src_port, dst_port, proto, src_as, dst_as, in_iface, out_iface, sampling_rate, timestamp_start, timestamp_end, label ! Stitching - determine and add timestamp_min and timestamp_max fields nfacctd_stitching: false diff --git a/conf-pmacct/pretag.map b/conf-pmacct/pretag.map deleted file mode 100644 index 008c3f97..00000000 --- a/conf-pmacct/pretag.map +++ /dev/null @@ -1,5 +0,0 @@ -! This file is used to set sensor names according to the ports the flows came in on. -! Label should be "sfacct--" or "nfacct--" (whichever is appropriate) followed by the sensor name with spaces replaced by #'s - -set_label=sfacct--Sflow#Sensor filter='(port 9998)' -set_label=nfacct--Netflow#Sensor filter='(port 9999)' diff --git a/conf-pmacct/sfacct-pretag.map b/conf-pmacct/sfacct-pretag.map new file mode 100644 index 00000000..2944d245 --- /dev/null +++ b/conf-pmacct/sfacct-pretag.map @@ -0,0 +1,5 @@ +! This file is referenced in a config file and used to set the "label" field to a sensor name. +! Label should be "sfacct--" or "nfacct--" (for sflow or netflow, respectively) +! followed by the sensor name with spaces replaced by #'s + +set_label=sfacct--Sflow#Sensor diff --git a/conf-pmacct/sfacctd.conf b/conf-pmacct/sfacctd.conf index ea2bfc29..fda6df10 100644 --- a/conf-pmacct/sfacctd.conf +++ b/conf-pmacct/sfacctd.conf @@ -1,4 +1,5 @@ ! PMACCT CONFIG FOR SFLOW +! Settings most likely to need changes: SFACCTD_PORT, PRE_TAG_MAP, and AMQP_ROUTING_KEY !# debug: true @@ -7,7 +8,7 @@ sfacctd_port: 9998 ! Get a value for 'label' from the pre_tag_map file. ! We use this to encode the sensor name for each port. - pre_tag_map: /etc/pmacct/pretag.map + pre_tag_map: /etc/pmacct/sfacct-pretag.map ! FOR PRINTING TO FILES ! Default format is tab-separated. If 'label' or any variable length field is in the aggregation list, you have to use csv format. diff --git a/grnoc-netsage-pipeline.spec b/grnoc-netsage-pipeline.spec index 1ae83ece..221c17fb 100644 --- a/grnoc-netsage-pipeline.spec +++ b/grnoc-netsage-pipeline.spec @@ -123,7 +123,8 @@ rm -rf $RPM_BUILD_ROOT # Don't overwrite these .confs. Create .rpmnew files if needed. %config(noreplace) /etc/pmacct/sfacctd.conf %config(noreplace) /etc/pmacct/nfacctd.conf -%config(noreplace) /etc/pmacct/pretag.map +%config(noreplace) /etc/pmacct/sfacct-pretag.map +%config(noreplace) /etc/pmacct/nfacct-pretag.map %config(noreplace) /etc/logstash/conf.d/01-input-rabbit.conf %config(noreplace) /etc/logstash/conf.d/15-sensor-specific-changes.conf %config(noreplace) /etc/logstash/conf.d/40-aggregation.conf From 86da1f7656db6ae6ed78a90c7729318d9a6baf25 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Tue, 26 Apr 2022 15:47:57 +0000 Subject: [PATCH 063/126] Added logstash conf for translating pmacct fields --- conf-logstash/05-translate-pmacct.conf | 90 ++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 conf-logstash/05-translate-pmacct.conf diff --git a/conf-logstash/05-translate-pmacct.conf b/conf-logstash/05-translate-pmacct.conf new file mode 100644 index 00000000..9c01e09c --- /dev/null +++ b/conf-logstash/05-translate-pmacct.conf @@ -0,0 +1,90 @@ +# Translate pmacct fields to those the rest of the pipeline uses + # NOTE: pmacct (nfacctd and sfacctd) must be run with + # pretag files wherein 'label' must be set to 'sfacct--' or 'nfacct--' + # followed by the sensor name with spaces replaced by #s. + +filter { + + # For sflow - + if [label] =~ /^sfacct--/ { + mutate { + add_field => { "[meta][flow_type]" => "sflow" } + # (Assuming some aggregation over time by sfacctd) + rename => {'timestamp_min' => 'start'} + rename => {'timestamp_max' => 'end'} + id => "05-1" + } + } + + # For netflow - + if [label] =~ /^nfacct--/ { + # On startup, before a template is sent and read, sampling_rate will be 0 and no corrections applied; drop these! + # COMMENT OUT IF THERE REALLY IS NO SAMPLING AT THE ROUTER! + if [sampling_rate] == 0 { + drop { id => "05-0" } + } + mutate { + add_field => { "[meta][flow_type]" => "netflow" } + # (Assuming no aggregation over time by nfacctd) + rename => {'timestamp_start' => 'start'} + rename => {'timestamp_end' => 'end'} + id => "05-2" + } + } + + # For all flows from pmacct processes - + # Get sensor name + # Note: In the pmacct pretag file, label must be set to sfacct-- or nfacct-- + # followed by the real sensor name with spaces replaced by #s. + ruby { + code => ' + sensor = event.get("label") + sensor = sensor.gsub("sfacct--", "") + sensor = sensor.gsub("nfacct--", "") + sensor = sensor.gsub("#", " ") + event.set( "[meta][sensor_id]", sensor ) + ' + tag_on_exception => '_rubyexception getting sensor from label in 05-translate-pmacct. ' + id => "05-3" + } + # Do field name translations + mutate { + rename => {'ip_src' => '[meta][src_ip]'} + rename => {'ip_dst' => '[meta][dst_ip]'} + rename => {'port_src' => '[meta][src_port]'} + rename => {'port_dst' => '[meta][dst_port]'} + rename => {'ip_proto' => '[meta][protocol]'} + rename => {'iface_in' => '[meta][src_ifindex]'} + rename => {'iface_out' => '[meta][dst_ifindex]'} + rename => {'as_src' => '[meta][src_asn]'} + rename => {'as_dst' => '[meta][dst_asn]'} + rename => {'packets' => '[values][num_packets]'} + convert => {'start' => 'float'} + convert => {'end' => 'float'} + id => "05-4" + } + ruby { + code => ' + event.set( "[values][num_bits]", event.get("bytes") * 8 ) + event.set( "[values][duration]", event.get("end") - event.get("start") ) + if event.get("[values][duration]") <= 0.001 ## == 0 to within roundoff error + event.set( "[values][bits_per_second]", 0 ) + event.set( "[values][packets_per_second]", 0 ) + else + bps = event.get("[values][num_bits]") / event.get("[values][duration]") + pps = event.get("[values][num_packets]") / event.get("[values][duration]") + event.set( "[values][bits_per_second]" , bps.to_i ) + event.set( "[values][packets_per_second]", pps.to_i ) + end + ' + tag_on_exception => '_rubyexception in 05-translate-pmacct. ' + id => "05-5" + } + # Remove unneeded fields + mutate { + remove_field => [ 'sampling_rate', 'event_type', 'writer_id' ] + remove_field => [ 'label', 'bytes' ] + id => "05-6" + } + +} From ae0a71160975aa1d101ecba95cba94ff7024ed5e Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Tue, 26 Apr 2022 16:00:54 +0000 Subject: [PATCH 064/126] mods for new conf file, cosmetic changes --- MANIFEST | 1 + conf-pmacct/nfacctd.conf | 11 +++++------ conf-pmacct/sfacctd.conf | 11 +++++------ grnoc-netsage-pipeline.spec | 1 + 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/MANIFEST b/MANIFEST index ba143547..0ad377b3 100644 --- a/MANIFEST +++ b/MANIFEST @@ -8,6 +8,7 @@ conf-pmacct/nfacct-pretag.map conf-logstash/01-input-jsonfile.conf.disabled conf-logstash/01-input-multiline-json-file.conf.disabled conf-logstash/01-input-rabbit.conf +conf-logstash/05-translate-pmacct.conf conf-logstash/10-preliminaries.conf conf-logstash/15-sensor-specific-changes.conf conf-logstash/20-add-id.conf diff --git a/conf-pmacct/nfacctd.conf b/conf-pmacct/nfacctd.conf index 74050249..a3c06425 100644 --- a/conf-pmacct/nfacctd.conf +++ b/conf-pmacct/nfacctd.conf @@ -10,7 +10,7 @@ nfacctd_port: 9999 ! We use this to encode the sensor name for each port. pre_tag_map: /etc/pmacct/nfacct-pretag.map -! FOR PRINTING FILES +! FOR PRINTING TO FILES instead of writing to rabbit queue (comment out amqp_* lines) ! Default is tab-separated output. If 'label' or any variable length field is in the aggregation list, you have to use csv format. !# plugins: print !# print_output_file: /tmp/nfacct/%Y/%m/%d/nfacct.%Y%m%d-%H%M @@ -26,17 +26,16 @@ pre_tag_map: /etc/pmacct/nfacct-pretag.map amqp_exchange_type: direct amqp_exchange: amq.direct amqp_persistent_msg: true - amqp_vhost: / amqp_routing_key: netsage_deidentifier_raw + amqp_vhost: / +! How often to write to the rabbit queue (default is 60 sec) + amqp_refresh_time: 60 ! To aggregate flows over time (x sec)- ! 1. Set print_refresh_time or amqp_refresh_time to x sec. This is the period over which it will aggregate / how often to output. ! 2. Do not include times (timestamp_start and timestamp_end) in aggregate list. ! 3. Set nfacctd_stitching: true -! BUT FOR NETSAGE, DO NOT AGGREGATE NETFLOW OVER TIME WITH PMACCT! - -! How often to write to the rabbit queue (default is 60 sec) - amqp_refresh_time: 60 +! FOR NETSAGE, DO NOT AGGREGATE NETFLOW OVER TIME WITH PMACCT! ! Fields to aggregate on - include all fields we want in the ouput. See pmacct docs for field meanings. ! For netflow, the available timestamps are timestamp_start and timestamp_end (since the netflow exporter itself aggregates over time). diff --git a/conf-pmacct/sfacctd.conf b/conf-pmacct/sfacctd.conf index fda6df10..4e1770f1 100644 --- a/conf-pmacct/sfacctd.conf +++ b/conf-pmacct/sfacctd.conf @@ -10,7 +10,7 @@ sfacctd_port: 9998 ! We use this to encode the sensor name for each port. pre_tag_map: /etc/pmacct/sfacct-pretag.map -! FOR PRINTING TO FILES +! FOR PRINTING TO FILES instead of writing to rabbit queue (comment out amqp_* lines) ! Default format is tab-separated. If 'label' or any variable length field is in the aggregation list, you have to use csv format. !## plugins: print !## print_output_file: /tmp/sfacct/%Y/%m/%d/sfacct.%Y%m%d-%H%M @@ -26,18 +26,17 @@ sfacctd_port: 9998 amqp_exchange_type: direct amqp_exchange: amq.direct amqp_persistent_msg: true - amqp_vhost: / amqp_routing_key: netsage_deidentifier_raw + amqp_vhost: / +! How often to write to rabbit queue = time to aggregate over (default = 60 sec) + amqp_refresh_time: 300 -! FOR NETSAGE, AGGREGATE SFLOW OVER TIME (5 minutes) WITH PMACCT ! To aggregate flows over time (x sec)- ! 1. Set print_refresh_time or amqp_refresh_time to x sec. ! 2. Do not include timestamp_arrival in aggregate list. ! Timestamp_min and timestamp_max will be added to output automatically, do not include in aggregate list. ! 3. Set sfacctd_stitching: true - -! How often to write to rabbit queue = time to aggregate over (default = 60 sec) - amqp_refresh_time: 300 +! FOR NETSAGE, AGGREGATE SFLOW OVER TIME (5 minutes) WITH PMACCT ! Fields to aggregate on - include all fields we want in the ouput. See pmacct docs for field meanings ! For sflow, the only available timestamp is timestamp_arrival (arrival at collector). Do not include it to aggregate over time. diff --git a/grnoc-netsage-pipeline.spec b/grnoc-netsage-pipeline.spec index 221c17fb..6baef024 100644 --- a/grnoc-netsage-pipeline.spec +++ b/grnoc-netsage-pipeline.spec @@ -135,6 +135,7 @@ rm -rf $RPM_BUILD_ROOT %config(noreplace) /etc/logstash/conf.d/99-output-jsonlog.conf.disabled %config(noreplace) /etc/logstash/conf.d/99-output-multiline-json.conf.disabled %config(noreplace) /etc/logstash/conf.d/99-output-elastic.conf.disabled +%config /etc/logstash/conf.d/05-translate-pmacct.conf %config /etc/logstash/conf.d/10-preliminaries.conf %config /etc/logstash/conf.d/20-add-id.conf %config /etc/logstash/conf.d/45-geoip-tagging.conf From 9c33261eaec6f3166a8c073a1f6b7e895ed9b16c Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Tue, 26 Apr 2022 20:50:30 +0000 Subject: [PATCH 065/126] Added new 40-aggregation.conf for pmacct - default timeouts of 5 and 60 min, for netflow it adjusts start times by cutting off whole hours --- conf-logstash/40-aggregation.conf | 238 +++++++++++++++++++++--------- 1 file changed, 167 insertions(+), 71 deletions(-) diff --git a/conf-logstash/40-aggregation.conf b/conf-logstash/40-aggregation.conf index c5588c84..b514a3b7 100644 --- a/conf-logstash/40-aggregation.conf +++ b/conf-logstash/40-aggregation.conf @@ -1,72 +1,82 @@ -##### COPY ANY CHANGES TO YOUR EXISTING VERSION AFTER AN UPGRADE ##### - +# This filter stitches together incoming flows that go together. +# ## Fields most likely to be specific to a logstash pipeline: -## These may be set via environment variables. -## aggregate_maps_path - must be unique for each pipeline. Aggregation info is written here if logstash exits. Default is /tmp/logstash-aggregation-maps. -## inactivity_timeout - value depends on timespan of nfcapd files. Default is 630 sec. -## timeout - the maximum length of a flow. Default is 1 day. -## (trial - this may be useful for testing. Commented out by default.) - -# This filter stitches together flows from different nfcapd files, each (usually) spanning a 5 min. period. -# Note: netflow keeps the start time the same for all flows with the same fingerprint, even across different nfcapd files; -# duration is cumulative but counts are not. Sflow just sends samples. -# If only 1 packet is seen, end time will = start time and duration will be 0. - -# NOTE: tags added to events before this point in the pipeline aren't kept. +## You may set these via environment variables +## aggregate_maps_path - must be unique for each logstash pipeline. Default is /tmp/logstash-aggregation-maps. +## inactivity_timeout - default is 5 min. +## timeout - the maximum length of a flow. Default is 1 hr. +## NOTE THAT THERE ARE SEPARATE SECTIONS FOR SFLOW AND NETFLOW, +## EDIT BOTH !!!! filter { - # TSTAT - tstat only reports complete flows, so no stitching is needed! + # === TSTAT === + # Tstat only reports complete flows, so no stitching is needed! # Just add stitched_flows=0 (means no stitching attempted) if [meta][flow_type] == 'tstat' { mutate { id => "40-1" add_field => { 'stitched_flows' => 0 } } - - } - - # SFLOW AND NETFLOW - aggregate flows spanning more than 1 nfcapd file + } else { - # We need the 'start' time as a date, as well as as a timestamp + # for aggregation, we need the 'start' or 'end' date, as well as as timestamp date { id => "40-2" match => [ '[start]', 'UNIX' ] target => '[start_date]' } - - aggregate { + date { id => "40-3" - # unique ID used to aggregate events + match => [ '[end]', 'UNIX' ] + target => '[end_date]' + } + } + + # === SFLOW === + # Aggregate on hash of 5-tuple + sensor + # Incoming events may be single samples or results from partial aggregation/stitching by sfacctd. + if [meta][flow_type] == "sflow" { + aggregate { + id => "40-4" + # Events that have matching task_id's will be aggregated. task_id => '%{[flow_fingerprint]}' - # save the fingerprint value on timeout + # Save the task_id value to this field in the aggregated event on timeout timeout_task_id_field => "[flow_fingerprint]" - # use event's start time rather than system time to determine whether a timeout has occured (must be type 'date') + # Use this field when determining if timeouts have occurred, in case we are processing historical data. + # It'll actually look at values of this field AND the clock times at which events come in. (Must be type 'date') timeout_timestamp_field => '[start_date]' - # If more than inactivity_timeout seconds have passed between the 'start' of this event and the 'start' - # of the LAST matching event, OR if no matching flow has coming in for inactivity_timeout seconds - # on the clock, assume the flow has ended. - ## Use 630 sec = 10.5 min for 5-min files, 960 sec = 16 min for 15-min files. - ## (For 5-min files, this allows one 5 min gap or period during which the no. of bits transferred don't meet the cutoff) - inactivity_timeout => "${inactivity_timeout:630}" - - # Maximum possible flow length. Stop aggregating even if we're still seeing matching events coming in. - ## Use 86400 sec = 1 day - timeout => "${max_flow_timeout:86400}" - - # send the aggregation map as a new event upon timeout + # Inactive timeout + # A flow is assumed to have ended if more than inactivity_timeout seconds have passed since the last matching event. + # (Aggregator compares timeout_timestamp_field of the current matching event and of the last matching event. If the diff is + # greater than inactivity_timeout, it ends the current flow and starts a new one. + # ALSO, every 5 sec, it compares the ingest clock time of the last matching event to NOW. + # If more than inactivity_timeout seconds have passed, it declares the flow finished.) + ## default 300 sec = 5 min + inactivity_timeout => "${inactivity_timeout:300}" + + # Active timeout + # = maximum possible flow duration + # (Aggregator compares timeout_timestamp_field of the current event to that of the FIRST event in the map. If the + # diff is greater than timeout, it ends the current flow and starts a new one, even if matching events are still coming in. + # ALSO, every 5 sec, it compares the ingest clock time of the first event in the map to NOW. + # If more than timeout seconds have passed, it declares the flow finished, even if matching events are still coming in.) + ## default 3600 sec = 1 hour + timeout => "${max_flow_timeout:3600}" + + # Save the aggregation map as a new event upon timeout push_map_as_event_on_timeout => true - - # save the aggregation maps here when logstash shuts down + + # Save all the in-progress aggregation maps when logstash shuts down, to be read back in when it restarts. ## (use a different file for each logstash pipeline!) aggregate_maps_path => '${aggregation_maps_path:/tmp/logstash-aggregation-maps}' - - # ruby code to run each time we see an event - # (||= assigns the value only if the variable does not yet exist. 'map' values are included in the final event.) + # Ruby code to run for each event. + # (The event will be added to the correct map (hash) according to its task_id. + # ||= assigns the value only if the variable does not yet exist. Only map values are included in the final event.) code => " # keep track of how many events we aggregate map['stitched_flows'] ||= 0 @@ -76,58 +86,144 @@ filter { map['start'] ||= event.get('start') map['end'] ||= event.get('end') - # save meta and values info from the first event - # values will be updated as we stitch on other flows + # Save meta, values, and tags info from the FIRST event. + # Only 'values' will be updated as we stitch events or at the very end. map['meta'] ||= event.get('meta') map['values'] ||= event.get('values') + map['tags'] ||= event.get('tags') - # essentially the time the flow entered the pipeline + # Essentially the time the flow entered the pipeline map['@ingest_time'] ||= Time.now # Saving @timestamp caused problems when aggregate map was saved to a file then read. # but this works. # An @timestamp will be added when the map is finally pushed as an event. #### FOR TESTING - # map['trial'] = 1 - # map['values']['durations_sum'] ||= 0; - # map['values']['durations_sum'] += event.get('[values][duration]') - # map['values']['durations'] ||= ' ' - # map['values']['durations'] += event.get('[values][duration]').to_s - # map['values']['durations'] += '; ' + #map['trial'] = 1 + #map['values']['indivDurations'] ||= ' ' + #map['values']['indivDurations'] += event.get('[values][duration]').to_f.round(3).to_s + #map['values']['indivDurations'] += '; ' #### - # if we are seeing a subsequent flow event + # If we are seeing a subsequent flow event... (assumes all events are in order!) if map['stitched_flows'] > 1 - - # be very sure we are getting the correct start and end times, even if events are out of order - map['start'] = [ map['start'], event.get('start') ].min - map['end'] = [ map['end'], event.get('end') ].max - + map['end'] = event.get('end') # sum the packet and bit counters map['values']['num_packets'] += event.get('[values][num_packets]') map['values']['num_bits'] += event.get('[values][num_bits]') + end - # recalculate total duration - map['values']['duration'] = map['end'] - map['start'] + # Discard the original events. We only care about the aggregation. + event.cancel() + " + + # Code to run on the new aggregated event before it's pushed out + timeout_code => " + # recalculate total duration + duration = event.get('end') - event.get('start') + event.set( '[values][duration]', duration.round(3) ) - # recalculate average pps and bps - if map['values']['duration'] > 0 - map['values']['packets_per_second'] = (map['values']['num_packets'] / map['values']['duration']).to_i; - map['values']['bits_per_second'] = (map['values']['num_bits'] / map['values']['duration']).to_i; + # recalculate average pps and bps (say duration < .001 is 0 within roundoff error) + if duration >= 0.001 + event.set( '[values][packets_per_second]', event.get('[values][num_packets]') / duration ) + event.set( '[values][bits_per_second]', event.get('[values][num_bits]') / duration ) else - # can't calculate so set to 0 # - map['values']['packets_per_second'] = 0; - map['values']['bits_per_second'] = 0; + # can't calculate (accurate) rates so set to 0 + event.set( '[values][duration]', 0 ) + event.set( '[values][packets_per_second]', 0 ) + event.set( '[values][bits_per_second]', 0 ) end + " + } + } + # === NETFLOW === + # Aggregate on hash of 5-tuple + sensor + start time + # We have to do special things due to the fact that netflow sensors send "updates" about active flows, + # all with the same start time, but bytes and packets are not cumulative. + # The following will aggregate the updates up to 1 hr; and it will adjust start times when long flows are split up into 1 hr chunks. + # Note that when there's a timeout at the router (default inactive timeout is usually 15 sec), the flows will stay separate + # and not be stitched, even though they have the same 5-tuple, since the start time will change. + else if [meta][flow_type] == "netflow" { + ruby { + # if duration is > timeout (1 hr), adjust start time to cut off n*timeout (whole hours). + # That part of the flow should have already been processed and pushed out. + id => "40-5" + code => " + start = event.get( 'start' ) + duration = event.get( '[values][duration]' ).to_f + cuts = 0 # how many times the start time got cut + while duration > 3600.0 + start = start + 3600.0 # move start forward + duration -= 3600.0 + cuts += 1 + end + if cuts > 0 + event.set( 'start', start ) + event.set( '[values][duration]', duration ) + event.set( '@dur_cuts', cuts ) #### FOR TESTING + end + " + } + aggregate { + id => "40-6" + # unique ID used to aggregate events ## A second agg filter must have different task_id "pattern" + # For Netflow, include start time so only "updates" with the same start time are aggregated, not + # continuations after short gaps that the router considers timeouts. + task_id => '%{[flow_fingerprint]}-%{[start]}' - # round off after calculations - map['values']['duration'] = (map['values']['duration']).round(3) + # see comments above. MAKE SURE THE VALUES/DEFAULTS ARE THE SAME HERE. + timeout_timestamp_field => '[start_date]' + inactivity_timeout => "${inactivity_timeout:300}" + timeout => "${max_flow_timeout:3600}" + push_map_as_event_on_timeout => true + ## can only set this in 1 agg. filter and it is set above! + ## aggregate_maps_path => '${aggregation_maps_path:/tmp/logstash-aggregation-maps}' + + # Ruby code to run for each event. + code => " + # we have to save flow_fingerprint explicitly for netflow + map['flow_fingerprint'] ||= event.get('flow_fingerprint') + + map['stitched_flows'] ||= 0 + map['stitched_flows'] += 1 + map['start'] ||= event.get('start') + map['end'] ||= event.get('end') + map['meta'] ||= event.get('meta') + map['values'] ||= event.get('values') + map['tags'] ||= event.get('tags') + map['@ingest_time'] ||= Time.now + + #### FOR TESTING + #map['trial'] = 1 + #map['values']['indivDurations'] ||= ' ' + #map['values']['indivDurations'] += event.get('[values][duration]').to_f.round(3).to_s + #map['values']['indivDurations'] += '; ' + #### + + if map['stitched_flows'] > 1 + map['end'] = event.get('end') + map['values']['num_packets'] += event.get('[values][num_packets]') + map['values']['num_bits'] += event.get('[values][num_bits]') + map['@dur_cuts'] = event.get('@dur_cuts') #### FOR TESTING end - # discard the original event. we only care about the aggregation. event.cancel() " - } - } + + timeout_code => " + duration = event.get('end') - event.get('start') + event.set( '[values][duration]', duration.round(3) ) + + if duration >= 0.001 + event.set( '[values][packets_per_second]', event.get('[values][num_packets]') / duration ) + event.set( '[values][bits_per_second]', event.get('[values][num_bits]') / duration ) + else + event.set( '[values][duration]', 0 ) + event.set( '[values][packets_per_second]', 0 ) + event.set( '[values][bits_per_second]', 0 ) + end + " + } + } # end if netflow } From 0520a82b94e6fdd2668fbd0cbc15e3b61ebeeb9e Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Tue, 26 Apr 2022 21:12:09 +0000 Subject: [PATCH 066/126] Added 41-thresholds.conf with flow size threshold of 10MB and duration threshold of .1 sec. Mods to 10-preliminaries. --- conf-logstash/10-preliminaries.conf | 34 ++++++++++------------------- conf-logstash/41-thresholds.conf | 22 +++++++++++++++++++ 2 files changed, 33 insertions(+), 23 deletions(-) create mode 100644 conf-logstash/41-thresholds.conf diff --git a/conf-logstash/10-preliminaries.conf b/conf-logstash/10-preliminaries.conf index 296ec2f9..7c9c0446 100644 --- a/conf-logstash/10-preliminaries.conf +++ b/conf-logstash/10-preliminaries.conf @@ -6,35 +6,35 @@ filter { if ![meta][src_ip] { mutate{ id => "10-01" - add_tag => [ "src_ip was missing in orig data!?" ] + add_tag => [ "src_ip was missing in flow header!?" ] add_field => { "[meta][src_ip]" => "0.0.0.0" } } } if ![meta][dst_ip] { mutate{ id => "10-02" - add_tag => [ "dst_ip was missing in orig data!?" ] + add_tag => [ "dst_ip was missing in flow header!?" ] add_field => { "[meta][dst_ip]" => "0.0.0.0" } } } if ![meta][src_ifindex] { mutate{ id => "10-03" - add_tag => [ "src_ifindex was missing!?" ] + add_tag => [ "src_ifindex was missing in flow header!?" ] add_field => { "[meta][src_ifindex]" => -10 } } } if ![meta][dst_ifindex] { mutate{ id => "10-04" - add_tag => [ "dst_ifindex was missing in orig data!?" ] + add_tag => [ "dst_ifindex was missing in flow header!?" ] add_field => { "[meta][dst_ifindex]" => -10 } } } if ![values][duration] { mutate{ id => "10-05" - add_tag => [ "duration was missing in orig data!?" ] + add_tag => [ "duration was missing!?" ] add_field => { "[values][duration]" => 0 } } } @@ -57,16 +57,16 @@ filter { } # drop if [@private_src] == "yes" or [@private_dst] == "yes" { - drop { } + drop { id => "10-3" } } # 3. Add @ingest_time field (useful for debugging) mutate { - id => "10-3" + id => "10-4" add_field => { '@ingest_time' => "%{@timestamp}" } } date { - id => "10-4" + id => "10-5" match => [ "@ingest_time", "ISO8601" ] target => "@ingest_time" } @@ -74,7 +74,7 @@ filter { # 4. Convert strings to numeric types where appropriate. We need to use these in calculations later. # Start and end are timestamps at this point. Make sure they are floats. mutate { - id => "10-5" + id => "10-6" convert => { 'start' => 'float' 'end' => 'float' @@ -86,25 +86,13 @@ filter { } } - # 5. If duration (eg from aggregation by nfdump in importer) is <= .002, set it to 0. - # When duration is too small, bps calculation is highly inaccurate. - if [values][duration] <= 0.002 { - mutate { - id => "10-6" - replace => {"[values][duration]" => 0} - replace => {"[values][bits_per_second]" => 0} - replace => {"[values][packets_per_second]" => 0} - } - } - - ruby { id => "10-7" code => " flow_ts = event.get('start').to_f flow_te = event.get('end').to_f - # 6. Convert any timestamps in ms to s + # 5. Convert any timestamps in ms to s if flow_ts > 9999999999.0 flow_ts = flow_ts / 1000.0 event.set('start', flow_ts) @@ -114,7 +102,7 @@ filter { event.set('end', flow_te) end - # 7. DROP any event with a strange start or end time + # 6. DROP any event with a strange start or end time # > 10 sec in the future or > 1 year in the past, or end < start current_t = Time.now.to_f age_s = current_t - flow_ts diff --git a/conf-logstash/41-thresholds.conf b/conf-logstash/41-thresholds.conf new file mode 100644 index 00000000..be7e16d7 --- /dev/null +++ b/conf-logstash/41-thresholds.conf @@ -0,0 +1,22 @@ +# Apply various thresholds after aggregating/stitching (if any) + +filter { + + # The minimum flow size threshold is 10 MB = 80,000,000 bits. + # Drop any flows still smaller than that. + if [values][num_bits] < 80000000 { + drop { id => "41-1" } + } + + # If duration is too small, it's almost certainly inaccurate and it will make rates inaccurate. + # For durations under the threshold, set duration and rates to 0. Default is 0.1 sec. + if [values][duration] < 0.1 { + mutate { + id => "41-2" + replace => {"[values][duration]" => 0} + replace => {"[values][bits_per_second]" => 0} + replace => {"[values][packets_per_second]" => 0} + } + } + +} From 9309692b1c40c5b8bedd8b357c43237a4132902e Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Tue, 26 Apr 2022 21:16:55 +0000 Subject: [PATCH 067/126] updates to spec and manifest files --- MANIFEST | 1 + grnoc-netsage-pipeline.spec | 1 + 2 files changed, 2 insertions(+) diff --git a/MANIFEST b/MANIFEST index 0ad377b3..409eda2a 100644 --- a/MANIFEST +++ b/MANIFEST @@ -13,6 +13,7 @@ conf-logstash/10-preliminaries.conf conf-logstash/15-sensor-specific-changes.conf conf-logstash/20-add-id.conf conf-logstash/40-aggregation.conf +conf-lonstash/41-thresholds.conf conf-logstash/45-geoip-tagging.conf conf-logstash/50-asn.conf conf-logstash/53-caida-org.conf diff --git a/grnoc-netsage-pipeline.spec b/grnoc-netsage-pipeline.spec index 6baef024..46831fe4 100644 --- a/grnoc-netsage-pipeline.spec +++ b/grnoc-netsage-pipeline.spec @@ -138,6 +138,7 @@ rm -rf $RPM_BUILD_ROOT %config /etc/logstash/conf.d/05-translate-pmacct.conf %config /etc/logstash/conf.d/10-preliminaries.conf %config /etc/logstash/conf.d/20-add-id.conf +%config /etc/logstash/conf.d/41-thresholds.conf %config /etc/logstash/conf.d/45-geoip-tagging.conf %config /etc/logstash/conf.d/50-asn.conf %config /etc/logstash/conf.d/53-caida-org.conf From 1997857f0286dca910e98e5b0fbafcf9cc4f4f7d Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Tue, 26 Apr 2022 21:23:46 +0000 Subject: [PATCH 068/126] typo in manifest --- MANIFEST | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MANIFEST b/MANIFEST index 409eda2a..76a1093c 100644 --- a/MANIFEST +++ b/MANIFEST @@ -13,7 +13,7 @@ conf-logstash/10-preliminaries.conf conf-logstash/15-sensor-specific-changes.conf conf-logstash/20-add-id.conf conf-logstash/40-aggregation.conf -conf-lonstash/41-thresholds.conf +conf-logstash/41-thresholds.conf conf-logstash/45-geoip-tagging.conf conf-logstash/50-asn.conf conf-logstash/53-caida-org.conf From bfbbd4f1ce9b87d8a761311cc4d5b4888ae90633 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 27 Apr 2022 15:24:38 +0000 Subject: [PATCH 069/126] 05-translate-pmacct.conf does not apply to tstat, tag flows instead of dropping them if no sampling correction has been done, save @sampling_corrected field --- conf-logstash/05-translate-pmacct.conf | 36 +++++++++++-------- conf-logstash/15-sensor-specific-changes.conf | 4 +-- conf-logstash/40-aggregation.conf | 3 +- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/conf-logstash/05-translate-pmacct.conf b/conf-logstash/05-translate-pmacct.conf index 9c01e09c..dfaa3304 100644 --- a/conf-logstash/05-translate-pmacct.conf +++ b/conf-logstash/05-translate-pmacct.conf @@ -4,35 +4,42 @@ # followed by the sensor name with spaces replaced by #s. filter { + # skip all this for tstat! + if [label] { - # For sflow - + # FOF SFLOW - if [label] =~ /^sfacct--/ { mutate { add_field => { "[meta][flow_type]" => "sflow" } - # (Assuming some aggregation over time by sfacctd) + # Assuming some aggregation over time by sfacctd: rename => {'timestamp_min' => 'start'} rename => {'timestamp_max' => 'end'} id => "05-1" } } - # For netflow - + # FOR NETFLOW - if [label] =~ /^nfacct--/ { - # On startup, before a template is sent and read, sampling_rate will be 0 and no corrections applied; drop these! - # COMMENT OUT IF THERE REALLY IS NO SAMPLING AT THE ROUTER! - if [sampling_rate] == 0 { - drop { id => "05-0" } - } mutate { add_field => { "[meta][flow_type]" => "netflow" } - # (Assuming no aggregation over time by nfacctd) + # Assuming no aggregation over time by nfacctd: rename => {'timestamp_start' => 'start'} rename => {'timestamp_end' => 'end'} id => "05-2" } } - # For all flows from pmacct processes - + # FOR ALL PMACCT FLOWS - + # Tag flows without a sampling rate + # (router may not be sending it, template may not have arrived yet, there may be no sampling) + # Here sampling_rate is just a flag, 0 or 1. Save the flag to @sampling_corrected. + if [sampling_rate] == 0 { + mutate { + id => "05-3" + add_tag => ["No sampling rate found on ingest"] + add_field => { "@sampling_corrected" => [sampling_rate] } + } + } # Get sensor name # Note: In the pmacct pretag file, label must be set to sfacct-- or nfacct-- # followed by the real sensor name with spaces replaced by #s. @@ -45,7 +52,7 @@ filter { event.set( "[meta][sensor_id]", sensor ) ' tag_on_exception => '_rubyexception getting sensor from label in 05-translate-pmacct. ' - id => "05-3" + id => "05-4" } # Do field name translations mutate { @@ -61,7 +68,7 @@ filter { rename => {'packets' => '[values][num_packets]'} convert => {'start' => 'float'} convert => {'end' => 'float'} - id => "05-4" + id => "05-5" } ruby { code => ' @@ -78,13 +85,14 @@ filter { end ' tag_on_exception => '_rubyexception in 05-translate-pmacct. ' - id => "05-5" + id => "05-6" } # Remove unneeded fields mutate { remove_field => [ 'sampling_rate', 'event_type', 'writer_id' ] remove_field => [ 'label', 'bytes' ] - id => "05-6" + id => "05-7" } + } } diff --git a/conf-logstash/15-sensor-specific-changes.conf b/conf-logstash/15-sensor-specific-changes.conf index 02a2500a..262ed5c5 100644 --- a/conf-logstash/15-sensor-specific-changes.conf +++ b/conf-logstash/15-sensor-specific-changes.conf @@ -78,9 +78,7 @@ filter { # SAMPLING RATE CORRECTIONS - #---- Manually apply a sampling correction to listed sensors. Use only in special cases when the flow exporter or collector is providing corrections. - # For netflow, a sampling rate correction can be done here or in the nfsen config or nfcapd command using the -s option. - # For sflow, there is no such option, so it must be done here. + #---- Manually apply a sampling correction to listed sensors. mutate { add_field => { "[@metadata][sampling_correction_flag]" => "${sampling_correction_flag:False}" } diff --git a/conf-logstash/40-aggregation.conf b/conf-logstash/40-aggregation.conf index b514a3b7..fce17917 100644 --- a/conf-logstash/40-aggregation.conf +++ b/conf-logstash/40-aggregation.conf @@ -86,11 +86,12 @@ filter { map['start'] ||= event.get('start') map['end'] ||= event.get('end') - # Save meta, values, and tags info from the FIRST event. + # Save these fields from the FIRST event. # Only 'values' will be updated as we stitch events or at the very end. map['meta'] ||= event.get('meta') map['values'] ||= event.get('values') map['tags'] ||= event.get('tags') + map['@sampling_corrected'] ||= event.get('@sampling_corrected') # Essentially the time the flow entered the pipeline map['@ingest_time'] ||= Time.now # Saving @timestamp caused problems when aggregate map was saved to a file then read. From 71c3513f6280d62f57211924cf11ca9719fc4a9f Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 27 Apr 2022 16:26:57 +0000 Subject: [PATCH 070/126] forgot to edit netflow section of 40-aggregation.conf also. --- conf-logstash/40-aggregation.conf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/conf-logstash/40-aggregation.conf b/conf-logstash/40-aggregation.conf index fce17917..a27125ed 100644 --- a/conf-logstash/40-aggregation.conf +++ b/conf-logstash/40-aggregation.conf @@ -98,7 +98,7 @@ filter { # but this works. # An @timestamp will be added when the map is finally pushed as an event. - #### FOR TESTING + #### FOR TESTING (EDIT IN BOTH SFLOW AND NETFLOW SECTIONS !!!) #map['trial'] = 1 #map['values']['indivDurations'] ||= ' ' #map['values']['indivDurations'] += event.get('[values][duration]').to_f.round(3).to_s @@ -192,9 +192,10 @@ filter { map['meta'] ||= event.get('meta') map['values'] ||= event.get('values') map['tags'] ||= event.get('tags') + map['@sampling_corrected'] ||= event.get('@sampling_corrected') map['@ingest_time'] ||= Time.now - #### FOR TESTING + #### FOR TESTING (EDIT IN BOTH SFLOW AND NETFLOW SECTIONS !!!) #map['trial'] = 1 #map['values']['indivDurations'] ||= ' ' #map['values']['indivDurations'] += event.get('[values][duration]').to_f.round(3).to_s From 0d047779eb98e36addbb1b0632dae75debe3688a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 28 Apr 2022 22:20:09 +0000 Subject: [PATCH 071/126] Bump cross-fetch from 3.1.3 to 3.1.5 in /website Bumps [cross-fetch](https://github.com/lquixada/cross-fetch) from 3.1.3 to 3.1.5. - [Release notes](https://github.com/lquixada/cross-fetch/releases) - [Commits](https://github.com/lquixada/cross-fetch/compare/v3.1.3...v3.1.5) --- updated-dependencies: - dependency-name: cross-fetch dependency-type: indirect ... Signed-off-by: dependabot[bot] --- website/yarn.lock | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/website/yarn.lock b/website/yarn.lock index 07aeee5f..b2a68a13 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -3305,11 +3305,11 @@ create-hmac@^1.1.0, create-hmac@^1.1.4, create-hmac@^1.1.7: sha.js "^2.4.8" cross-fetch@^3.0.4: - version "3.1.3" - resolved "https://registry.yarnpkg.com/cross-fetch/-/cross-fetch-3.1.3.tgz#b8e7d5f19161c4a0ca916f707978848786043afb" - integrity sha512-2i6v88DTqVBNODyjD9U6Ycn/uSZNvyHe25cIbo2fFnAACAsaLTJsd23miRWiR5NuiGXR9wpJ9d40/9WAhjDIrw== + version "3.1.5" + resolved "https://registry.yarnpkg.com/cross-fetch/-/cross-fetch-3.1.5.tgz#e1389f44d9e7ba767907f7af8454787952ab534f" + integrity sha512-lvb1SBsI0Z7GDwmuid+mU3kWVBwTVUbe7S0H52yaaAdQOXq2YktTCZdlAcNKFzE6QtRz0snpw9bNiPeOIkkQvw== dependencies: - node-fetch "2.6.1" + node-fetch "2.6.7" cross-spawn@7.0.3, cross-spawn@^7.0.3: version "7.0.3" @@ -6481,10 +6481,12 @@ node-emoji@^1.10.0: dependencies: lodash.toarray "^4.4.0" -node-fetch@2.6.1: - version "2.6.1" - resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.1.tgz#045bd323631f76ed2e2b55573394416b639a0052" - integrity sha512-V4aYg89jEoVRxRb2fJdAg8FHvI7cEyYdVAh94HH0UIK8oJxUfkjlDQN9RbMx+bEjP7+ggMiFRprSti032Oipxw== +node-fetch@2.6.7: + version "2.6.7" + resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.7.tgz#24de9fba827e3b4ae44dc8b20256a379160052ad" + integrity sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ== + dependencies: + whatwg-url "^5.0.0" node-fetch@^3.1.1: version "3.1.1" @@ -9417,6 +9419,11 @@ totalist@^1.0.0: resolved "https://registry.yarnpkg.com/totalist/-/totalist-1.1.0.tgz#a4d65a3e546517701e3e5c37a47a70ac97fe56df" integrity sha512-gduQwd1rOdDMGxFG1gEvhV88Oirdo2p+KjoYFU7k2g+i7n6AFFbDQ5kMPUsW0pNbfQsB/cwXvT1i4Bue0s9g5g== +tr46@~0.0.3: + version "0.0.3" + resolved "https://registry.yarnpkg.com/tr46/-/tr46-0.0.3.tgz#8184fd347dac9cdc185992f3a6622e14b9d9ab6a" + integrity sha1-gYT9NH2snNwYWZLzpmIuFLnZq2o= + trim-trailing-lines@^1.0.0: version "1.1.4" resolved "https://registry.yarnpkg.com/trim-trailing-lines/-/trim-trailing-lines-1.1.4.tgz#bd4abbec7cc880462f10b2c8b5ce1d8d1ec7c2c0" @@ -9926,6 +9933,11 @@ web-streams-polyfill@^3.0.3: resolved "https://registry.yarnpkg.com/web-streams-polyfill/-/web-streams-polyfill-3.2.0.tgz#a6b74026b38e4885869fb5c589e90b95ccfc7965" integrity sha512-EqPmREeOzttaLRm5HS7io98goBgZ7IVz79aDvqjD0kYXLtFZTc0T/U6wHTPKyIjb+MdN7DFIIX6hgdBEpWmfPA== +webidl-conversions@^3.0.0: + version "3.0.1" + resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-3.0.1.tgz#24534275e2a7bc6be7bc86611cc16ae0a5654871" + integrity sha1-JFNCdeKnvGvnvIZhHMFq4KVlSHE= + webpack-bundle-analyzer@^4.4.0: version "4.4.0" resolved "https://registry.yarnpkg.com/webpack-bundle-analyzer/-/webpack-bundle-analyzer-4.4.0.tgz#74013106e7e2b07cbd64f3a5ae847f7e814802c7" @@ -10071,6 +10083,14 @@ websocket-extensions@>=0.1.1: resolved "https://registry.yarnpkg.com/websocket-extensions/-/websocket-extensions-0.1.4.tgz#7f8473bc839dfd87608adb95d7eb075211578a42" integrity sha512-OqedPIGOfsDlo31UNwYbCFMSaO9m9G/0faIHj5/dZFDMFqPTcx6UwqyOy3COEaEOg/9VsGIpdqn62W5KhoKSpg== +whatwg-url@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/whatwg-url/-/whatwg-url-5.0.0.tgz#966454e8765462e37644d3626f6742ce8b70965d" + integrity sha1-lmRU6HZUYuN2RNNib2dCzotwll0= + dependencies: + tr46 "~0.0.3" + webidl-conversions "^3.0.0" + which-boxed-primitive@^1.0.2: version "1.0.2" resolved "https://registry.yarnpkg.com/which-boxed-primitive/-/which-boxed-primitive-1.0.2.tgz#13757bc89b209b049fe5d86430e21cf40a89a8e6" From 4d23a03cfc649bfe70862ac00d1b31e315378397 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 29 Apr 2022 15:01:39 +0000 Subject: [PATCH 072/126] Bump async from 2.6.3 to 2.6.4 in /website Bumps [async](https://github.com/caolan/async) from 2.6.3 to 2.6.4. - [Release notes](https://github.com/caolan/async/releases) - [Changelog](https://github.com/caolan/async/blob/v2.6.4/CHANGELOG.md) - [Commits](https://github.com/caolan/async/compare/v2.6.3...v2.6.4) --- updated-dependencies: - dependency-name: async dependency-type: indirect ... Signed-off-by: dependabot[bot] --- website/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/yarn.lock b/website/yarn.lock index 07aeee5f..00c8d758 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -2250,9 +2250,9 @@ async-limiter@~1.0.0: integrity sha512-csOlWGAcRFJaI6m+F2WKdnMKr4HhdhFVBk0H/QbJFMCr+uO2kwohwXQPxw/9OCxp05r5ghVBFSyioixx3gfkNQ== async@^2.6.2: - version "2.6.3" - resolved "https://registry.yarnpkg.com/async/-/async-2.6.3.tgz#d72625e2344a3656e3a3ad4fa749fa83299d82ff" - integrity sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg== + version "2.6.4" + resolved "https://registry.yarnpkg.com/async/-/async-2.6.4.tgz#706b7ff6084664cd7eae713f6f965433b5504221" + integrity sha512-mzo5dfJYwAn29PeiJ0zvwTo04zj8HDJj0Mn8TD7sno7q12prdbnasKJHhkm2c1LgrhlJ0teaea8860oxi51mGA== dependencies: lodash "^4.17.14" From 9631661c6250fea65a04c12150c288c3301f2b55 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 4 May 2022 19:52:29 +0000 Subject: [PATCH 073/126] Fixing ifindex filtering and adding ALL options --- conf-logstash/15-sensor-specific-changes.conf | 68 +++++++++++-------- env.example | 28 ++++---- 2 files changed, 53 insertions(+), 43 deletions(-) diff --git a/conf-logstash/15-sensor-specific-changes.conf b/conf-logstash/15-sensor-specific-changes.conf index 262ed5c5..778fac51 100644 --- a/conf-logstash/15-sensor-specific-changes.conf +++ b/conf-logstash/15-sensor-specific-changes.conf @@ -1,18 +1,21 @@ # Make changes required for specific sensors -# ${variable-name:default-value} are obtained from an environment file (the .env file for Docker installations; for bare-metal installations, -# the default is /etc/logstash/logstash-env-vars - see the logstash systemd file) -# If values are not provided (eg, there is no env file), the defaults following the :'s are used. (Flags will be False, so nothing will happen). +# ${variable-name:default-value} are obtained from an environment file (the .env file for Docker installations; +# for bare-metal installations, the default is /etc/logstash/logstash-env-vars - see the logstash systemd file) # With a bare-metal installation, you may also just edit this file and fill in the values you want. +# If values are not provided (eg, there is no env file), the defaults following the :'s are used. +# (Flags will be False, so nothing will happen). -# Using env vars in conditionals has been an open issue for logstash since 2016! Workaround is to add a field. +# Using env vars in conditionals has been an open issue for logstash since 2016! Workaround is to add a "flag" field. filter { # IFINDEX FILTERING #---- Drop flows that do not have src or dst ifindex in a specified list of ifindexes # Specifying a sensor name is optional. If not given, the ifindex list will apply to all sensors. - # Example settings in env file: ifindex_filter_keep="500; Sensor 1: 123,456; Sensor 2 : 789, 123" - # (If specified, the sensor name must be exact, otherwise spaces don't matter. Separate lists with semicolons.) + # ALL can refer to all sensors or all interfaces. + # If a sensor is not referred to, keep all its flows. + # Example settings in env file: ifindex_filter_keep="500; Sensor 1: 123,456; Sensor 2 :ALL" + # (If specified, the sensor name must be exact, otherwise spaces don't matter. Separate lists with semicolons.) mutate { add_field => { "[@metadata][ifindex_filter_flag]" => "${ifindex_filter_flag:False}" } } @@ -22,35 +25,40 @@ filter { id => "15-1" } mutate { - # Split the string on ';' into an array of the same name (in a separate mutate, since in mutate, split happens before all add_fields) + # Split the string on ';' into an array of the same name (in a separate mutate, since in mutate, + # split happens before all add_fields) # Add a dummy array element to force it to be an array, in case there is just 1 value in the env file. split => { "[@metadata][ifindex_filter_keep]" => ";" } add_field => { "[@metadata][ifindex_filter_keep]" => "dummy" } id => "15-2" } # Each (non-dummy) array element should have 'sensor-name: list-of-approved-ifindexes' ('sensor-name:' optional) - # Use Ruby to loop ruby { - code => ' - action = "drop" - filters = event.get("[@metadata][ifindex_filter_keep]") + id => "15-3" + code => " + # keep any flows that the filter list does not mention + action = 'keep' + # loop over the array of ifindex filters + filters = event.get('[@metadata][ifindex_filter_keep]') filters.each do |f| - next if f == "dummy" - # if f specifies a sensor that is not the current sensor, we can skip it. - # Otherwise, parse out the ifindex array and see if it includes the current ifindex. - if (! f.include? ":") or (f =~ /#{event.get("[meta][sensor_id]")}\s*:/) - f.sub!(/.*:/, "") - f.gsub!(/\s/, "") - indexes = f.split(",") - if indexes.include? event.get("[meta][src_ifindex]").to_s or indexes.include? event.get("[meta][dst_ifindex]").to_s - action = "keep" + next if f == 'dummy' + # if filter-sensor=flow-sensor or the filter apples to all sensors, check the ifindex list + # Once 'keep' is determined, quit loop and move on to next flow. + if ( (f =~ /^\s*ALL\s*:/) or (f =~ /^\s*#{event.get('[meta][sensor_id]')}\s*:/) or (! f.include? ':') + f.sub!(/.*:/, '') # remove : and everything before it + f.gsub!(/\s/, '') # get rid of spaces in ifindex list + indexes = f.split(',') # split on commas into an array + # only if the ifindex-list is ALL or includes the current ifindex, keep this flow. + action = 'drop' + if (indexes.include? 'ALL') or (indexes.include? event.get('[meta][src_ifindex]').to_s or indexes.include? event.get('[meta][dst_ifindex]').to_s) + action = 'keep' break end end end - event.cancel if action == "drop" - ' - } + event.cancel if action == 'drop' + " + } } @@ -58,20 +66,20 @@ filter { #---- Change the sensor name for flows from a certain interface (ifindex) mutate { add_field => { "[@metadata][ifindex_sensor_rename_flag]" => "${ifindex_sensor_rename_flag:False}" } - id => "15-3" + id => "15-4" } if [@metadata][ifindex_sensor_rename_flag] == "True" { mutate { add_field => { "[@metadata][ifindex_sensor_rename_old_name]" => "${ifindex_sensor_rename_old_name:oldname}" } add_field => { "[@metadata][ifindex_sensor_rename_new_name]" => "${ifindex_sensor_rename_new_name:newname}" } add_field => { "[@metadata][ifindex_sensor_rename_ifindex]" => "${ifindex_sensor_rename_ifindex:1}" } - id => "15-4" + id => "15-5" } if [meta][sensor_id] == [@metadata][ifindex_sensor_rename_old_name] and ( [meta][src_ifindex] == [@metadata][ifindex_sensor_rename_ifindex] or [meta][dst_ifindex] == [@metadata][ifindex_sensor_rename_ifindex] ) { mutate { replace => { "[meta][sensor_id]" => "%{[@metadata][ifindex_sensor_rename_new_name]}" } - id => "15-5" + id => "15-6" } } } @@ -82,19 +90,19 @@ filter { mutate { add_field => { "[@metadata][sampling_correction_flag]" => "${sampling_correction_flag:False}" } - id => "15-6" + id => "15-7" } if [@metadata][sampling_correction_flag] == "True" { mutate { add_field => { "[@metadata][sampling_correction_sensors]" => "${sampling_correction_sensors:sensor1,sensor2}" } add_field => { "[@metadata][sampling_correction_factor]" => "${sampling_correction_factor:1}" } - id => "15-7" + id => "15-8" } mutate { # make the field into an array (see comments about split above) split => { "[@metadata][sampling_correction_sensors]" => "," } add_field => { "[@metadata][sampling_correction_sensors]" => "dummy" } - id => "15-8" + id => "15-9" } if [meta][sensor_id] in [@metadata][sampling_correction_sensors] { ruby { @@ -105,7 +113,7 @@ filter { event.set('[values][bits_per_second]', correction_factor * event.get('[values][bits_per_second]').to_i) event.set('[values][packets_per_second]', correction_factor * event.get('[values][packets_per_second]').to_i) " - id => "15-9" + id => "15-10" } } } diff --git a/env.example b/env.example index bd4c8b30..fe8f0b87 100644 --- a/env.example +++ b/env.example @@ -1,33 +1,35 @@ -# Importer settings +# Sensor name to assign # == EXAMPLE VALUES MUST BE REPLACED == sflowSensorName=The Sflow Sensor Name netflowSensorName=The Netflow Sensor Name -# Logstash output rabbit queue -# default is to use the local rabbitmq server -# === FOR SENDING TO GlobalNOC, ASK FOR THE PROPER SETTINGS === +# Final Logstash output is to a rabbit queue, +# the default is to write to the local rabbitmq server. +# === FOR SENDING PROCESSED FLOWS TO GlobalNOC, ASK FOR THE PROPER SETTINGS === rabbitmq_output_host=rabbit rabbitmq_output_username=guest rabbitmq_output_pw=guest rabbitmq_output_key=netsage_archive_input -# To drop all flows except those using the specfied interfaces +# To filter flows by sensor(s) and interface(s) # (see the Docker Advanced documentation) +# "ALL" can refer to all sensors or all interfaces of a sensor. +# If a sensor is not references, all flows will be kept. ifindex_filter_flag=False -# ifindex_filter_keep=111; Sensor 1: 456; Sensor 2: 789,123 +#example# ifindex_filter_keep= Sensor-1: 456,789; Sensor 2: ALL -# To change the sensor name for flows using a certain interface +# To change the sensor name for flows coming from a given sensor and using a certain interface. # (See the Docker Advanced documentation) ifindex_sensor_rename_flag=False -# ifindex_sensor_rename_old_name=oldname -# ifindex_sensor_rename_new_name=newname -# ifindex_sensor_rename_ifindex=0 +#example# ifindex_sensor_rename_ifindex=123 +#example# ifindex_sensor_rename_old_name=old name +#example# ifindex_sensor_rename_new_name=new name -# To "manually" correct flow sizes and rates for sampling for specified sensors +# To manually correct flow sizes and rates for sampling, specified sensor(s) only. # (See the Docker Advanced documentation. This is uncommon.) sampling_correction_flag=False -# sampling_correction_sensors=sensor1,sensor2 -# sampling_correction_factor=1 +#example# sampling_correction_sensors=sensor1,sensor2 +#example# sampling_correction_factor=100 # Logstash Aggregation Filter settings # default inactivity_timeout is 630 sec for 5-minute nfcapd files; for 15-minute files, use 960 sec. From 4adb39914a47f5938437680b5d423f6d9e6f3fd8 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 4 May 2022 22:12:54 +0000 Subject: [PATCH 074/126] Couple fixes to last commit. Changed timeouts to 5 min and 1 hr in env.example. --- conf-logstash/15-sensor-specific-changes.conf | 4 ++-- env.example | 20 +++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/conf-logstash/15-sensor-specific-changes.conf b/conf-logstash/15-sensor-specific-changes.conf index 778fac51..1a1e1cbc 100644 --- a/conf-logstash/15-sensor-specific-changes.conf +++ b/conf-logstash/15-sensor-specific-changes.conf @@ -44,13 +44,13 @@ filter { next if f == 'dummy' # if filter-sensor=flow-sensor or the filter apples to all sensors, check the ifindex list # Once 'keep' is determined, quit loop and move on to next flow. - if ( (f =~ /^\s*ALL\s*:/) or (f =~ /^\s*#{event.get('[meta][sensor_id]')}\s*:/) or (! f.include? ':') + if (f =~ /^\s*ALL\s*:/) or (f =~ /^\s*#{event.get('[meta][sensor_id]')}\s*:/) or (! f.include? ':') f.sub!(/.*:/, '') # remove : and everything before it f.gsub!(/\s/, '') # get rid of spaces in ifindex list indexes = f.split(',') # split on commas into an array # only if the ifindex-list is ALL or includes the current ifindex, keep this flow. action = 'drop' - if (indexes.include? 'ALL') or (indexes.include? event.get('[meta][src_ifindex]').to_s or indexes.include? event.get('[meta][dst_ifindex]').to_s) + if (indexes.include? 'ALL') or (indexes.include? event.get('[meta][src_ifindex]').to_s) or (indexes.include? event.get('[meta][dst_ifindex]').to_s) action = 'keep' break end diff --git a/env.example b/env.example index fe8f0b87..07627323 100644 --- a/env.example +++ b/env.example @@ -16,26 +16,26 @@ rabbitmq_output_key=netsage_archive_input # "ALL" can refer to all sensors or all interfaces of a sensor. # If a sensor is not references, all flows will be kept. ifindex_filter_flag=False -#example# ifindex_filter_keep= Sensor-1: 456,789; Sensor 2: ALL +#example: ifindex_filter_keep= Sensor-1: 456,789; Sensor 2: ALL # To change the sensor name for flows coming from a given sensor and using a certain interface. # (See the Docker Advanced documentation) ifindex_sensor_rename_flag=False -#example# ifindex_sensor_rename_ifindex=123 -#example# ifindex_sensor_rename_old_name=old name -#example# ifindex_sensor_rename_new_name=new name +#example: ifindex_sensor_rename_ifindex=123 +#example: ifindex_sensor_rename_old_name=old name +#example: ifindex_sensor_rename_new_name=new name # To manually correct flow sizes and rates for sampling, specified sensor(s) only. # (See the Docker Advanced documentation. This is uncommon.) sampling_correction_flag=False -#example# sampling_correction_sensors=sensor1,sensor2 -#example# sampling_correction_factor=100 +#example: sampling_correction_sensors=sensor1,sensor2 +#example: sampling_correction_factor=100 # Logstash Aggregation Filter settings -# default inactivity_timeout is 630 sec for 5-minute nfcapd files; for 15-minute files, use 960 sec. -# max_flow_timeout is the maximum flow duration; longer flows will be broken up. -inactivity_timeout=630 -max_flow_timeout=86400 +# default inactivity_timeout is 5-minute +# default max_flow_timeout is 1 hour. This is the maximum allowed flow duration; longer flows will be broken up. +inactivity_timeout=300 +max_flow_timeout=3600 aggregation_maps_path=/data/logstash-aggregation-maps # Logstash settings From 061528d62a4bb80a27db4090ad3473ea81edca5d Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Tue, 24 May 2022 19:31:04 +0000 Subject: [PATCH 075/126] Updated deprecated config parameters in translate filters --- conf-logstash/53-caida-org.conf | 8 ++++---- conf-logstash/90-additional-fields.conf | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/conf-logstash/53-caida-org.conf b/conf-logstash/53-caida-org.conf index c88e08b3..e290b4e6 100644 --- a/conf-logstash/53-caida-org.conf +++ b/conf-logstash/53-caida-org.conf @@ -8,8 +8,8 @@ filter { if [meta][src_asn] != -1 { translate { id => "53-1" - field => "[meta][src_asn]" - destination => "[meta][src_organization]" + source => "[meta][src_asn]" + target => "[meta][src_organization]" dictionary_path => "/var/lib/grnoc/netsage/CAIDA-org-lookup.csv" fallback => "Unknown" override => true @@ -26,8 +26,8 @@ filter { if [meta][dst_asn] != -1 { translate { id => "53-3" - field => "[meta][dst_asn]" - destination => "[meta][dst_organization]" + source => "[meta][dst_asn]" + target => "[meta][dst_organization]" dictionary_path => "/var/lib/grnoc/netsage/CAIDA-org-lookup.csv" fallback => "Unknown" override => true diff --git a/conf-logstash/90-additional-fields.conf b/conf-logstash/90-additional-fields.conf index 2a3703f9..84b7cc49 100644 --- a/conf-logstash/90-additional-fields.conf +++ b/conf-logstash/90-additional-fields.conf @@ -5,8 +5,8 @@ filter { # sensor_group: Use dictionary to group together sensor IDs translate { id => "90-1" - field => "[meta][sensor_id]" - destination => "[meta][sensor_group]" + source => "[meta][sensor_id]" + target => "[meta][sensor_group]" dictionary_path => "/etc/logstash/conf.d/support/sensor_groups.json" regex => true } @@ -14,8 +14,8 @@ filter { # sensor_type: Use dictionary to set sensor_type such as Archive, Circuit, Exchange Point, etc. translate { id => "90-2" - field => "[meta][sensor_id]" - destination => "[meta][sensor_type]" + source => "[meta][sensor_id]" + target => "[meta][sensor_type]" dictionary_path => "/etc/logstash/conf.d/support/sensor_types.json" regex => true } From 6983e3ed12e0b7c0c57df28045bfa691eb04f514 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Tue, 24 May 2022 21:57:59 +0000 Subject: [PATCH 076/126] Added and moved some type conversions --- conf-logstash/01-input-rabbit.conf | 1 - conf-logstash/05-translate-pmacct.conf | 36 ++++++++++++++----- conf-logstash/10-preliminaries.conf | 23 +++--------- conf-logstash/15-sensor-specific-changes.conf | 4 +-- conf-logstash/40-aggregation.conf | 12 +++---- conf-logstash/41-thresholds.conf | 6 ++-- 6 files changed, 43 insertions(+), 39 deletions(-) diff --git a/conf-logstash/01-input-rabbit.conf b/conf-logstash/01-input-rabbit.conf index b333b13c..9279c151 100644 --- a/conf-logstash/01-input-rabbit.conf +++ b/conf-logstash/01-input-rabbit.conf @@ -2,7 +2,6 @@ input { # Normally, input events are flows from the named rabbit queue on LOCALHOST - # (The 'netsage_deidentfier_raw' rabbit queue may contain flows from netsage-netflow-importer-daemon and/or tstat_send.) # "${env-var:default-value}" will be replaced by the env-var environment variable value, or default-value if that is not set. # Change the queue and key name, if needed. rabbitmq{ diff --git a/conf-logstash/05-translate-pmacct.conf b/conf-logstash/05-translate-pmacct.conf index dfaa3304..917317ef 100644 --- a/conf-logstash/05-translate-pmacct.conf +++ b/conf-logstash/05-translate-pmacct.conf @@ -66,32 +66,52 @@ filter { rename => {'as_src' => '[meta][src_asn]'} rename => {'as_dst' => '[meta][dst_asn]'} rename => {'packets' => '[values][num_packets]'} - convert => {'start' => 'float'} - convert => {'end' => 'float'} id => "05-5" } + # Start and end are timestamps at this point. Make sure they are floats. + mutate { + convert => { + 'start' => 'float' + 'end' => 'float' + } + id => "05-6" + } + # Calculations ruby { code => ' event.set( "[values][num_bits]", event.get("bytes") * 8 ) event.set( "[values][duration]", event.get("end") - event.get("start") ) if event.get("[values][duration]") <= 0.001 ## == 0 to within roundoff error - event.set( "[values][bits_per_second]", 0 ) - event.set( "[values][packets_per_second]", 0 ) + event.set( "[values][bits_per_second]", 0.0 ) + event.set( "[values][packets_per_second]", 0.0 ) else bps = event.get("[values][num_bits]") / event.get("[values][duration]") pps = event.get("[values][num_packets]") / event.get("[values][duration]") - event.set( "[values][bits_per_second]" , bps.to_i ) - event.set( "[values][packets_per_second]", pps.to_i ) + event.set( "[values][bits_per_second]" , bps ) + event.set( "[values][packets_per_second]", pps ) end ' tag_on_exception => '_rubyexception in 05-translate-pmacct. ' - id => "05-6" + id => "05-7" + } + # Make sure these are numeric types. We need to use them in calculations and comparisons later. + mutate { + convert => { + '[values][num_bits]' => 'integer' + '[values][num_packets]' => 'integer' + '[values][duration]' => 'float' + '[values][bits_per_second]' => 'float' + '[values][packets_per_second]' => 'float' + '[meta][src_port]' => 'integer' + '[meta][dst_port]' => 'integer' + } + id => "05-8" } # Remove unneeded fields mutate { remove_field => [ 'sampling_rate', 'event_type', 'writer_id' ] remove_field => [ 'label', 'bytes' ] - id => "05-7" + id => "05-9" } } diff --git a/conf-logstash/10-preliminaries.conf b/conf-logstash/10-preliminaries.conf index 7c9c0446..4b7dbb8a 100644 --- a/conf-logstash/10-preliminaries.conf +++ b/conf-logstash/10-preliminaries.conf @@ -35,7 +35,7 @@ filter { mutate{ id => "10-05" add_tag => [ "duration was missing!?" ] - add_field => { "[values][duration]" => 0 } + add_field => { "[values][duration]" => 0.0 } } } @@ -71,28 +71,13 @@ filter { target => "@ingest_time" } - # 4. Convert strings to numeric types where appropriate. We need to use these in calculations later. - # Start and end are timestamps at this point. Make sure they are floats. - mutate { - id => "10-6" - convert => { - 'start' => 'float' - 'end' => 'float' - '[values][duration]' => 'float' - '[values][num_bits]' => 'integer' - '[values][num_packets]' => 'integer' - '[values][bits_per_second]' => 'float' - '[values][packets_per_second]' => 'float' - } - } - + # 4. Convert any timestamps in ms to s ruby { - id => "10-7" + id => "10-6" code => " flow_ts = event.get('start').to_f flow_te = event.get('end').to_f - # 5. Convert any timestamps in ms to s if flow_ts > 9999999999.0 flow_ts = flow_ts / 1000.0 event.set('start', flow_ts) @@ -102,7 +87,7 @@ filter { event.set('end', flow_te) end - # 6. DROP any event with a strange start or end time + # 5. DROP any event with a strange start or end time # > 10 sec in the future or > 1 year in the past, or end < start current_t = Time.now.to_f age_s = current_t - flow_ts diff --git a/conf-logstash/15-sensor-specific-changes.conf b/conf-logstash/15-sensor-specific-changes.conf index 1a1e1cbc..3831e411 100644 --- a/conf-logstash/15-sensor-specific-changes.conf +++ b/conf-logstash/15-sensor-specific-changes.conf @@ -110,8 +110,8 @@ filter { correction_factor = event.get('[@metadata][sampling_correction_factor]').to_i event.set('[values][num_bits]', correction_factor * event.get('[values][num_bits]').to_i) event.set('[values][num_packets]', correction_factor * event.get('[values][num_packets]').to_i) - event.set('[values][bits_per_second]', correction_factor * event.get('[values][bits_per_second]').to_i) - event.set('[values][packets_per_second]', correction_factor * event.get('[values][packets_per_second]').to_i) + event.set('[values][bits_per_second]', correction_factor * event.get('[values][bits_per_second]').to_f) + event.set('[values][packets_per_second]', correction_factor * event.get('[values][packets_per_second]').to_f) " id => "15-10" } diff --git a/conf-logstash/40-aggregation.conf b/conf-logstash/40-aggregation.conf index a27125ed..30875367 100644 --- a/conf-logstash/40-aggregation.conf +++ b/conf-logstash/40-aggregation.conf @@ -129,9 +129,9 @@ filter { event.set( '[values][bits_per_second]', event.get('[values][num_bits]') / duration ) else # can't calculate (accurate) rates so set to 0 - event.set( '[values][duration]', 0 ) - event.set( '[values][packets_per_second]', 0 ) - event.set( '[values][bits_per_second]', 0 ) + event.set( '[values][duration]', 0.0 ) + event.set( '[values][packets_per_second]', 0.0 ) + event.set( '[values][bits_per_second]', 0.0 ) end " } @@ -220,9 +220,9 @@ filter { event.set( '[values][packets_per_second]', event.get('[values][num_packets]') / duration ) event.set( '[values][bits_per_second]', event.get('[values][num_bits]') / duration ) else - event.set( '[values][duration]', 0 ) - event.set( '[values][packets_per_second]', 0 ) - event.set( '[values][bits_per_second]', 0 ) + event.set( '[values][duration]', 0.0 ) + event.set( '[values][packets_per_second]', 0.0 ) + event.set( '[values][bits_per_second]', 0.0 ) end " } diff --git a/conf-logstash/41-thresholds.conf b/conf-logstash/41-thresholds.conf index be7e16d7..f7eff9f4 100644 --- a/conf-logstash/41-thresholds.conf +++ b/conf-logstash/41-thresholds.conf @@ -13,9 +13,9 @@ filter { if [values][duration] < 0.1 { mutate { id => "41-2" - replace => {"[values][duration]" => 0} - replace => {"[values][bits_per_second]" => 0} - replace => {"[values][packets_per_second]" => 0} + replace => {"[values][duration]" => 0.0} + replace => {"[values][bits_per_second]" => 0.0} + replace => {"[values][packets_per_second]" => 0.0} } } From 979dd4d57a126f767ce27f435cbdd01eef782835 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 1 Jun 2022 16:35:37 +0000 Subject: [PATCH 077/126] Manual sampling corrections will now be done only if not already done by pmacct; will save new @sampling_corrected=yes/no field; new tags for logstash sampling corrections, no corrections by pmacct; sensor list in env file is now semicolon-delimited. --- conf-logstash/05-translate-pmacct.conf | 25 ++++++---- conf-logstash/15-sensor-specific-changes.conf | 48 +++++++++++-------- conf-logstash/40-aggregation.conf | 1 + .../60-scireg-tagging-fakegeoip.conf | 2 + conf-logstash/98-post-process.conf | 2 + env.example | 2 +- 6 files changed, 50 insertions(+), 30 deletions(-) diff --git a/conf-logstash/05-translate-pmacct.conf b/conf-logstash/05-translate-pmacct.conf index 917317ef..cf8fcb0b 100644 --- a/conf-logstash/05-translate-pmacct.conf +++ b/conf-logstash/05-translate-pmacct.conf @@ -30,14 +30,19 @@ filter { } # FOR ALL PMACCT FLOWS - - # Tag flows without a sampling rate + # Save sampling correction flag and Tag flows without a sampling rate. # (router may not be sending it, template may not have arrived yet, there may be no sampling) - # Here sampling_rate is just a flag, 0 or 1. Save the flag to @sampling_corrected. + # Here sampling_rate from pmacct is just a flag, 0 or 1. if [sampling_rate] == 0 { mutate { id => "05-3" - add_tag => ["No sampling rate found on ingest"] - add_field => { "@sampling_corrected" => [sampling_rate] } + add_tag => ["No pre-ingest sampling correction"] + add_field => { "@sampling_corrected" => "no" } + } + } else { + mutate { + id => "05-4" + add_field => { "@sampling_corrected" => "yes" } } } # Get sensor name @@ -52,7 +57,7 @@ filter { event.set( "[meta][sensor_id]", sensor ) ' tag_on_exception => '_rubyexception getting sensor from label in 05-translate-pmacct. ' - id => "05-4" + id => "05-5" } # Do field name translations mutate { @@ -66,7 +71,7 @@ filter { rename => {'as_src' => '[meta][src_asn]'} rename => {'as_dst' => '[meta][dst_asn]'} rename => {'packets' => '[values][num_packets]'} - id => "05-5" + id => "05-6" } # Start and end are timestamps at this point. Make sure they are floats. mutate { @@ -74,7 +79,7 @@ filter { 'start' => 'float' 'end' => 'float' } - id => "05-6" + id => "05-7" } # Calculations ruby { @@ -92,7 +97,7 @@ filter { end ' tag_on_exception => '_rubyexception in 05-translate-pmacct. ' - id => "05-7" + id => "05-8" } # Make sure these are numeric types. We need to use them in calculations and comparisons later. mutate { @@ -105,13 +110,13 @@ filter { '[meta][src_port]' => 'integer' '[meta][dst_port]' => 'integer' } - id => "05-8" + id => "05-9" } # Remove unneeded fields mutate { remove_field => [ 'sampling_rate', 'event_type', 'writer_id' ] remove_field => [ 'label', 'bytes' ] - id => "05-9" + id => "05-10" } } diff --git a/conf-logstash/15-sensor-specific-changes.conf b/conf-logstash/15-sensor-specific-changes.conf index 3831e411..f820fc82 100644 --- a/conf-logstash/15-sensor-specific-changes.conf +++ b/conf-logstash/15-sensor-specific-changes.conf @@ -32,9 +32,10 @@ filter { add_field => { "[@metadata][ifindex_filter_keep]" => "dummy" } id => "15-2" } - # Each (non-dummy) array element should have 'sensor-name: list-of-approved-ifindexes' ('sensor-name:' optional) + # Each (non-dummy) array element should have 'sensor-name: list-of-approved-ifindexes' ("sensor-name:" optional) ruby { id => "15-3" + tag_on_exception => "_rubyexception A in 15-sensor-specific-changes. " code => " # keep any flows that the filter list does not mention action = 'keep' @@ -86,35 +87,44 @@ filter { # SAMPLING RATE CORRECTIONS - #---- Manually apply a sampling correction to listed sensors. - + #---- Manually apply a sampling correction to listed sensors + # but ONLY IF there was no pre-logstash sampling correction applied by pmacct mutate { - add_field => { "[@metadata][sampling_correction_flag]" => "${sampling_correction_flag:False}" } + add_field => { "[@metadata][sampling_correction_flag]" => "${sampling_correction_flag:False}" } id => "15-7" } - if [@metadata][sampling_correction_flag] == "True" { + if [@metadata][sampling_correction_flag] == "True" and [@sampling_corrected] == "no" { mutate { - add_field => { "[@metadata][sampling_correction_sensors]" => "${sampling_correction_sensors:sensor1,sensor2}" } + add_field => { "[@metadata][sampling_correction_sensors]" => "${sampling_correction_sensors:sensor1;sensor2}" } add_field => { "[@metadata][sampling_correction_factor]" => "${sampling_correction_factor:1}" } id => "15-8" } + # make the field into an array (see comments about split above) mutate { - # make the field into an array (see comments about split above) - split => { "[@metadata][sampling_correction_sensors]" => "," } + split => { "[@metadata][sampling_correction_sensors]" => ";" } add_field => { "[@metadata][sampling_correction_sensors]" => "dummy" } id => "15-9" } - if [meta][sensor_id] in [@metadata][sampling_correction_sensors] { - ruby { - code => " - correction_factor = event.get('[@metadata][sampling_correction_factor]').to_i - event.set('[values][num_bits]', correction_factor * event.get('[values][num_bits]').to_i) - event.set('[values][num_packets]', correction_factor * event.get('[values][num_packets]').to_i) - event.set('[values][bits_per_second]', correction_factor * event.get('[values][bits_per_second]').to_f) - event.set('[values][packets_per_second]', correction_factor * event.get('[values][packets_per_second]').to_f) - " - id => "15-10" - } + ruby { + code => ' + # strip any leading or trailing spaces from sensor names + sensors = event.get("[@metadata][sampling_correction_sensors]").map! { |e| e.strip } + # if event sensor is in the list, apply corrections + if sensors.include? event.get("[meta][sensor_id]") + correction_factor = event.get("[@metadata][sampling_correction_factor]") + event.set("[values][num_bits]", correction_factor.to_i * event.get("[values][num_bits]").to_i) + event.set("[values][num_packets]", correction_factor.to_i * event.get("[values][num_packets]").to_i) + event.set("[values][bits_per_second]", correction_factor.to_f * event.get("[values][bits_per_second]").to_f) + event.set("[values][packets_per_second]", correction_factor.to_f * event.get("[values][packets_per_second]").to_f) + event.set("@sampling_corrected", "yes") + newtags = event.get("tags") + newtags ||= [] # if undefined, set to empty array + newtags.push( "Logstash sampling correction = #{correction_factor}" ) + event.set("[tags]", newtags ) + end + ' + id => "15-10" + tag_on_exception => "_rubyexception B in 15-sensor-specific-changes. " } } diff --git a/conf-logstash/40-aggregation.conf b/conf-logstash/40-aggregation.conf index 30875367..4aa583e2 100644 --- a/conf-logstash/40-aggregation.conf +++ b/conf-logstash/40-aggregation.conf @@ -148,6 +148,7 @@ filter { # if duration is > timeout (1 hr), adjust start time to cut off n*timeout (whole hours). # That part of the flow should have already been processed and pushed out. id => "40-5" + tag_on_exception => '_rubyexception in 40-aggregation.conf' code => " start = event.get( 'start' ) duration = event.get( '[values][duration]' ).to_f diff --git a/conf-logstash/60-scireg-tagging-fakegeoip.conf b/conf-logstash/60-scireg-tagging-fakegeoip.conf index 3a1d7367..e1af7e5f 100644 --- a/conf-logstash/60-scireg-tagging-fakegeoip.conf +++ b/conf-logstash/60-scireg-tagging-fakegeoip.conf @@ -40,6 +40,7 @@ filter { if [meta][scireg][src][projects][0] { ruby { id => "60-5" + tag_on_exception => '_rubyexception A in 60-scireg-tagging-fakegeoip.conf' code => " event.set('[meta][scireg][src][project_names]', event.get('[meta][scireg][src][projects]').map{ |n| n['project_name'] }) " @@ -48,6 +49,7 @@ filter { if [meta][scireg][dst][projects][0] { ruby { id => "60-6" + tag_on_exception => '_rubyexception B in 60-scireg-tagging-fakegeoip.conf' code => " event.set('[meta][scireg][dst][project_names]', event.get('[meta][scireg][dst][projects]').map{ |n| n['project_name'] }) " diff --git a/conf-logstash/98-post-process.conf b/conf-logstash/98-post-process.conf index 89109bde..a6053906 100644 --- a/conf-logstash/98-post-process.conf +++ b/conf-logstash/98-post-process.conf @@ -1,5 +1,6 @@ # info useful for monitoring what logstash is doing filter { + ruby { id => '98-1' code => ' @@ -9,4 +10,5 @@ filter { ' tag_on_exception => '_rubyexception in 98-post-process.conf' } + } diff --git a/env.example b/env.example index 07627323..0ecdf022 100644 --- a/env.example +++ b/env.example @@ -28,7 +28,7 @@ ifindex_sensor_rename_flag=False # To manually correct flow sizes and rates for sampling, specified sensor(s) only. # (See the Docker Advanced documentation. This is uncommon.) sampling_correction_flag=False -#example: sampling_correction_sensors=sensor1,sensor2 +#example: sampling_correction_sensors=sensor 1;sensor 2 #example: sampling_correction_factor=100 # Logstash Aggregation Filter settings From 682fd25ef753a34650cdac14245aa8d2fa3f0185 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 1 Jun 2022 18:07:09 +0000 Subject: [PATCH 078/126] Added CERN regexes to sensor group and type files --- conf-logstash/support/sensor_groups.json | 1 + conf-logstash/support/sensor_types.json | 13 +++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/conf-logstash/support/sensor_groups.json b/conf-logstash/support/sensor_groups.json index 32007f75..0d503d42 100644 --- a/conf-logstash/support/sensor_groups.json +++ b/conf-logstash/support/sensor_groups.json @@ -1,5 +1,6 @@ { "^.*cenic.*": "CENIC", + "^CERN.*": "CERN", "^FRGP.*": "FRGP", "^GEANT.*": "GEANT", "^gpn-.*": "GPN", diff --git a/conf-logstash/support/sensor_types.json b/conf-logstash/support/sensor_types.json index a635e1b1..7ac568f3 100644 --- a/conf-logstash/support/sensor_types.json +++ b/conf-logstash/support/sensor_types.json @@ -1,21 +1,21 @@ { "^.*Tstat$": "Data Archive", "^.*nersc\\.gov$": "Data Archive", + "^GEANT.*$": "Circuit", "^Hawaii.*netflow$": "Circuit", "^NEAAR.*": "Circuit", "^NEA3R.*": "Circuit", - "^TransPAC.*": "Circuit", - "^GEANT.*$": "Circuit", "^NORDUnet.*$": "Circuit", - "^SingAREN.*$": "Exchange Point", + "^TransPAC.*": "Circuit", "^.*pacificwave\\.net$": "Exchange Point", "^.*pnw-gigapop\\.net$": "Exchange Point", + "^SingAREN.*$": "Exchange Point", "^.*cenic.*$": "Regional Network", + "^FRGP.*$": "Regional Network", + "^GigaPOP.*$": "Regional Network", "^gpn-.*$": "Regional Network", "^GPN-.*$": "Regional Network", "^GPN .*$": "Regional Network", - "^FRGP.*$": "Regional Network", - "^GigaPOP.*$": "Regional Network", "^i-Light.*$": "Regional Network", "^LEARN.*$": "Regional Network", "^PennREN.*$": "Regional Network", @@ -23,5 +23,6 @@ "^.*sox.*$": "Regional Network", "^.*SoX.*$": "Regional Network", "^Sun Corridor.*$": "Regional Network", - "^tacc_netflows$": "Regional Network" + "^tacc_netflows$": "Regional Network", + "^CERN.*$": "Facility Edge" } From e319871ce892989b61f0b13b799111334c381b23 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 Jun 2022 20:38:42 +0000 Subject: [PATCH 079/126] Bump eventsource from 1.1.0 to 1.1.1 in /website Bumps [eventsource](https://github.com/EventSource/eventsource) from 1.1.0 to 1.1.1. - [Release notes](https://github.com/EventSource/eventsource/releases) - [Changelog](https://github.com/EventSource/eventsource/blob/master/HISTORY.md) - [Commits](https://github.com/EventSource/eventsource/compare/v1.1.0...v1.1.1) --- updated-dependencies: - dependency-name: eventsource dependency-type: indirect ... Signed-off-by: dependabot[bot] --- website/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/yarn.lock b/website/yarn.lock index 07aeee5f..78e0737d 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -4103,9 +4103,9 @@ events@^3.0.0: integrity sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q== eventsource@^1.0.7: - version "1.1.0" - resolved "https://registry.yarnpkg.com/eventsource/-/eventsource-1.1.0.tgz#00e8ca7c92109e94b0ddf32dac677d841028cfaf" - integrity sha512-VSJjT5oCNrFvCS6igjzPAt5hBzQ2qPBFIbJ03zLI9SE0mxwZpMw6BfJrbFHm1a141AavMEB8JHmBhWAd66PfCg== + version "1.1.1" + resolved "https://registry.yarnpkg.com/eventsource/-/eventsource-1.1.1.tgz#4544a35a57d7120fba4fa4c86cb4023b2c09df2f" + integrity sha512-qV5ZC0h7jYIAOhArFJgSfdyz6rALJyb270714o7ZtNnw2WSJ+eexhKtE0O8LYPRsHZHf2osHKZBxGPvm3kPkCA== dependencies: original "^1.0.0" From 6634e76d0da96c14c8aa51c08cde222caa85ae82 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 1 Jun 2022 21:29:06 +0000 Subject: [PATCH 080/126] Added subnet filtering --- conf-logstash/15-sensor-specific-changes.conf | 66 ++++++++++++++++++- env.example | 20 ++++-- 2 files changed, 79 insertions(+), 7 deletions(-) diff --git a/conf-logstash/15-sensor-specific-changes.conf b/conf-logstash/15-sensor-specific-changes.conf index f820fc82..581e4f2e 100644 --- a/conf-logstash/15-sensor-specific-changes.conf +++ b/conf-logstash/15-sensor-specific-changes.conf @@ -106,6 +106,8 @@ filter { id => "15-9" } ruby { + id => "15-10" + tag_on_exception => "_rubyexception B in 15-sensor-specific-changes. " code => ' # strip any leading or trailing spaces from sensor names sensors = event.get("[@metadata][sampling_correction_sensors]").map! { |e| e.strip } @@ -123,8 +125,68 @@ filter { event.set("[tags]", newtags ) end ' - id => "15-10" - tag_on_exception => "_rubyexception B in 15-sensor-specific-changes. " + } + } + + # SUBNET FILTERING + #---- For named sensors, drop all flows except those that have src or dst IP in a specified list of subnets. + # But keep all flows if a sensor is not referenced at all in the list (by name or ALL)! + # Example setting in env file: subnet_filter_keep="ALL: 123.45.6.0/24; Sensor 1: 98.765.43.0/24, 100.222.33.0/24" + # "ALL:" or a sensor name must be specified before each subnet list. If specified, the sensor name must be exact. + # Separate subnets with commas and lists with semicolons. + mutate { + add_field => { "[@metadata][subnet_filter_flag]" => "${subnet_filter_flag:False}" } + id => "15-11" + } + if [@metadata][subnet_filter_flag] == "True" { + mutate { + add_field => { "[@metadata][subnet_filter_keep]" => "${subnet_filter_keep:Some-Sensor:134.456.78.0/24}" } + id => "15-12" + } + mutate { + # Split the string on ';' into an array of the same name (in a separate mutate, since in mutate, split happens before all add_fields) + # Add a dummy array element to force it to be an array, in case there is just 1 value in the env file. + split => { "[@metadata][subnet_filter_keep]" => ";" } + add_field => { "[@metadata][subnet_filter_keep]" => "dummy" } + id => "15-13" + } + # Each (non-dummy) array element should have 'sensor-name: list-of-approved-subnets' + # Use Ruby to loop and test + ruby { + id => "15-14" + tag_on_exception => "_rubyexception C in 15-sensor-specific-changes. " + code => ' + require "ipaddr" + # Keep any flows that the filter list does not mention + action = "keep" + flow_sensor = event.get("[meta][sensor_id]") + flow_src = event.get("[meta][src_ip]") + flow_dst = event.get("[meta][dst_ip]") + filters = event.get("[@metadata][subnet_filter_keep]").map! { |e| e.strip } # already an array; strip leading and trailing spaces + # Loop over array of filters + filters.each do |f| + next if f == "dummy" + # If filter f specifies a sensor that is not the current sensor, we can skip it. + # Otherwise, parse f to remove the sensor name and get the subnet list. + if (f.include? "ALL:") or (f =~ /^#{flow_sensor}\s*:/) + f.sub!(/#{flow_sensor}\s*:/, "") + f.sub!(/ALL\s*:/, "") + f.gsub!(/\s/, "") + subnets = f.split(",") + # default is now to drop the flow + action = "drop" + # Loop over the subnets in the list + subnets.each do |net| + netobj = IPAddr.new(net) + if ( netobj.include? flow_src ) or ( netobj.include? flow_dst ) + action = "keep" + break + end + end + end # end if this filter list applies + end + event.cancel if action == "drop" + ' } } diff --git a/env.example b/env.example index 0ecdf022..18e256c6 100644 --- a/env.example +++ b/env.example @@ -11,26 +11,36 @@ rabbitmq_output_username=guest rabbitmq_output_pw=guest rabbitmq_output_key=netsage_archive_input -# To filter flows by sensor(s) and interface(s) +# To do ifindex (interface) filtering of flows from specified sensors: +# Flows will be dropped unless src or dst interface is in the list of ifindexes to keep. # (see the Docker Advanced documentation) # "ALL" can refer to all sensors or all interfaces of a sensor. -# If a sensor is not references, all flows will be kept. +# If a sensor is not referenced, all flows will be kept. ifindex_filter_flag=False #example: ifindex_filter_keep= Sensor-1: 456,789; Sensor 2: ALL -# To change the sensor name for flows coming from a given sensor and using a certain interface. +# To change the sensor name for flows from a given sensor and using a certain interface: # (See the Docker Advanced documentation) ifindex_sensor_rename_flag=False #example: ifindex_sensor_rename_ifindex=123 #example: ifindex_sensor_rename_old_name=old name #example: ifindex_sensor_rename_new_name=new name -# To manually correct flow sizes and rates for sampling, specified sensor(s) only. -# (See the Docker Advanced documentation. This is uncommon.) +# To account for sampling in the logstash pipeline, list affected sensors and the correction factor. +# Normally, corrections are applied before ingest into logstash, but in certain cases, this may be required. +# (See the Docker Advanced documentation) sampling_correction_flag=False #example: sampling_correction_sensors=sensor 1;sensor 2 #example: sampling_correction_factor=100 +# To do subnet filtering of flows from specified sensors: +# Flows will be dropped unless src or dst is in the list of subnets to keep. +# (see the Docker Advanced documentation) +# "ALL" can refer to all sensors +# If a sensor is not referenced, all its flows will be kept. +subnet_filter_flag=False +#example: subnet_filter_keep=Sensor 1: 123.45.6.0/16; Sensor 2: 123.33.33.0/24, 456.66.66.0/24 + # Logstash Aggregation Filter settings # default inactivity_timeout is 5-minute # default max_flow_timeout is 1 hour. This is the maximum allowed flow duration; longer flows will be broken up. From 7da2e441c48a7df6db3fc396d91236c76431dcea Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 2 Jun 2022 15:32:43 +0000 Subject: [PATCH 081/126] Added option to skip deidentification --- conf-logstash/70-deidentify.conf | 8 ++++ conf-logstash/95-cleanup.conf | 6 +-- env.example | 69 ++++++++++++++++++-------------- 3 files changed, 50 insertions(+), 33 deletions(-) diff --git a/conf-logstash/70-deidentify.conf b/conf-logstash/70-deidentify.conf index 8d8e1374..9112e843 100644 --- a/conf-logstash/70-deidentify.conf +++ b/conf-logstash/70-deidentify.conf @@ -11,6 +11,12 @@ filter { # For IPV6 addresses, # In anonymize_ipv6.rb script, expand to full format with 8 hextets, then replace the last 4 with x:x:x:x. +# By default, deidentfication will be done. Users may disable it here, or in the environment file for Docker installations. + mutate { + add_field => { "[@metadata][full_IPs_flag]" => "${full_IPs_flag:False}" } + } + if [@metadata][full_IPs_flag] == "False" { + # source ip's grok { id => "70-1" @@ -70,4 +76,6 @@ filter { mutate { update => { "[meta][dst_ip]" => "INVALID IP" } } } + } # end if deidentifying + } diff --git a/conf-logstash/95-cleanup.conf b/conf-logstash/95-cleanup.conf index 1abf9131..63f94a37 100644 --- a/conf-logstash/95-cleanup.conf +++ b/conf-logstash/95-cleanup.conf @@ -1,12 +1,12 @@ filter { # Tag flows with 2 missing IPs (0.0.0.0s). - # Check or edit the 99-outputs file for any action to be taken based on these tags. - if [meta][src_ip] == "0.0.0.x" and [meta][dst_ip] == "0.0.0.x" { + # Check/edit here or the 99-outputs file for any action to be taken based on this tag. + if [meta][src_ip] == "0.0.0.x" and [meta][dst_ip] == "0.0.0.x" + or ([meta][src_ip] == "0.0.0.0" and [meta][dst_ip] == "0.0.0.0") { mutate { id => "95-1" add_tag => ["Missing IPs"] - add_tag => ["DROP"] } } diff --git a/env.example b/env.example index 18e256c6..92776e5a 100644 --- a/env.example +++ b/env.example @@ -1,59 +1,68 @@ -# Sensor name to assign +# Sensor names that logstash will assign to flows # == EXAMPLE VALUES MUST BE REPLACED == sflowSensorName=The Sflow Sensor Name netflowSensorName=The Netflow Sensor Name -# Final Logstash output is to a rabbit queue, -# the default is to write to the local rabbitmq server. -# === FOR SENDING PROCESSED FLOWS TO GlobalNOC, ASK FOR THE PROPER SETTINGS === +# Processed flows are normally written to a rabbit queue. +# The default is to write to the local rabbitmq server "rabbit". +# === TO SEND PROCESSED FLOWS TO GlobalNOC, ASK FOR THE PROPER SETTINGS === rabbitmq_output_host=rabbit rabbitmq_output_username=guest rabbitmq_output_pw=guest rabbitmq_output_key=netsage_archive_input +# Logstash Aggregation Filter settings +# Default inactivity_timeout is 5-minute. If no matching flows have come in for 5 minutes, end the aggregated flow. +# Default max_flow_timeout is 1 hour. This is the maximum allowed flow duration; longer flows will be broken up. +# Aggregation_maps_path is where flows undergoing aggregation are saved if logstash shuts down. Default is for Docker installs. +inactivity_timeout=300 +max_flow_timeout=3600 +aggregation_maps_path=/data/logstash-aggregation-maps + +# PROCESSING OPTIONS - see the "Docker Advanced" documentation + # To do ifindex (interface) filtering of flows from specified sensors: -# Flows will be dropped unless src or dst interface is in the list of ifindexes to keep. -# (see the Docker Advanced documentation) +# Flows from listed sensors will be dropped unless src or dst interface is in the list of ifindexes to keep. # "ALL" can refer to all sensors or all interfaces of a sensor. -# If a sensor is not referenced, all flows will be kept. +# If a sensor is not referenced, all its flows will be kept. ifindex_filter_flag=False -#example: ifindex_filter_keep= Sensor-1: 456,789; Sensor 2: ALL +#ifindex_filter_keep= Sensor 1: 456,789; Sensor 2: ALL -# To change the sensor name for flows from a given sensor and using a certain interface: -# (See the Docker Advanced documentation) +# To change the sensor name for flows from a specified sensor and interface: +# Provide the ifindex, old and new sensor names. ifindex_sensor_rename_flag=False -#example: ifindex_sensor_rename_ifindex=123 -#example: ifindex_sensor_rename_old_name=old name -#example: ifindex_sensor_rename_new_name=new name +#ifindex_sensor_rename_ifindex=123 +#ifindex_sensor_rename_old_name=old name +#ifindex_sensor_rename_new_name=new name -# To account for sampling in the logstash pipeline, list affected sensors and the correction factor. -# Normally, corrections are applied before ingest into logstash, but in certain cases, this may be required. -# (See the Docker Advanced documentation) +# To correct for sampling in the logstash pipeline: +# Normally, sampling corrections are applied before ingest into logstash, but in certain cases, +# it may need to be done in logstash. +# List affected sensors and the correction factor. sampling_correction_flag=False -#example: sampling_correction_sensors=sensor 1;sensor 2 -#example: sampling_correction_factor=100 +#sampling_correction_sensors=sensor 1;sensor 2 +#sampling_correction_factor=100 -# To do subnet filtering of flows from specified sensors: -# Flows will be dropped unless src or dst is in the list of subnets to keep. -# (see the Docker Advanced documentation) -# "ALL" can refer to all sensors +# To do subnet filtering of flows: +# Flows from specified sensors will be dropped unless src or dst is in the list of subnets to keep. +# "ALL" can refer to all sensors. # If a sensor is not referenced, all its flows will be kept. subnet_filter_flag=False -#example: subnet_filter_keep=Sensor 1: 123.45.6.0/16; Sensor 2: 123.33.33.0/24, 456.66.66.0/24 +#subnet_filter_keep=Sensor 1: 123.45.6.0/16; Sensor 2: 123.33.33.0/24, 456.66.66.0/24 -# Logstash Aggregation Filter settings -# default inactivity_timeout is 5-minute -# default max_flow_timeout is 1 hour. This is the maximum allowed flow duration; longer flows will be broken up. -inactivity_timeout=300 -max_flow_timeout=3600 -aggregation_maps_path=/data/logstash-aggregation-maps +# To NOT deidentify flows: +# Deidentification of IP addresses is done by default. +# To keep full IP addresses, set this parameter to True. +full_IPs_flag=False + +# OTHER SETTINGS # Logstash settings # set this to false so we don't install elasticsearch locally XPACK_MONITORING_ENABLED=false # java heap size for logstash LS_JAVA_OPTS=-Xmx2g -Xms2g -# the logstash aggregation filter requires that only one logstash worker is running +# Do not change - the logstash aggregation filter requires that only one logstash worker is running! PIPELINE_WORKERS=1 # for debugging ## LOG_LEVEL=debug From 04c01444a2eb832c9c69c3d46864d0bfdcf1fd2f Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 8 Jun 2022 14:02:00 +0000 Subject: [PATCH 082/126] Changed agg timeout from 5 to 6 min; changed some comments; add Utah regexes to support/. --- conf-logstash/01-input-rabbit.conf | 8 +++---- conf-logstash/15-sensor-specific-changes.conf | 11 +++++---- conf-logstash/40-aggregation.conf | 24 ++++++++++++------- conf-logstash/70-deidentify.conf | 7 +++++- conf-logstash/99-output-rabbit.conf | 5 +++- .../support/networkA-members-list.rb.example | 10 ++++---- conf-logstash/support/sensor_groups.json | 3 ++- conf-logstash/support/sensor_types.json | 3 ++- env.example | 15 +++++------- 9 files changed, 51 insertions(+), 35 deletions(-) diff --git a/conf-logstash/01-input-rabbit.conf b/conf-logstash/01-input-rabbit.conf index 9279c151..7f575da5 100644 --- a/conf-logstash/01-input-rabbit.conf +++ b/conf-logstash/01-input-rabbit.conf @@ -1,9 +1,9 @@ -##### COPY NEEDED CHANGES TO YOUR EXISTING VERSION AFTER AN UPGRADE ##### +# Values for ${variable-name:default-value} are obtained from an environment file. For docker, from the .env file; +# for bare-metal installations, /etc/logstash/logstash-env-vars - specified in the logstash systemd file) +# If values are not provided in an env file, the defaults/examples following the :'s are used. +# With a bare-metal installation, you may also just edit this file and fill in the values you want. input { - # Normally, input events are flows from the named rabbit queue on LOCALHOST - # "${env-var:default-value}" will be replaced by the env-var environment variable value, or default-value if that is not set. - # Change the queue and key name, if needed. rabbitmq{ host => "${rabbitmq_input_host:localhost}" user => "${rabbitmq_input_username:guest}" diff --git a/conf-logstash/15-sensor-specific-changes.conf b/conf-logstash/15-sensor-specific-changes.conf index 581e4f2e..1504f3e0 100644 --- a/conf-logstash/15-sensor-specific-changes.conf +++ b/conf-logstash/15-sensor-specific-changes.conf @@ -1,11 +1,12 @@ -# Make changes required for specific sensors -# ${variable-name:default-value} are obtained from an environment file (the .env file for Docker installations; -# for bare-metal installations, the default is /etc/logstash/logstash-env-vars - see the logstash systemd file) +# Make any desired changes for flows from specific sensors + +# Values for ${variable-name:default-value} are obtained from an environment file. For docker, from the .env file; +# for bare-metal installations, /etc/logstash/logstash-env-vars - specified in the logstash systemd file) +# If values are not provided in an env file, the defaults/examples following the :'s are used. # With a bare-metal installation, you may also just edit this file and fill in the values you want. -# If values are not provided (eg, there is no env file), the defaults following the :'s are used. -# (Flags will be False, so nothing will happen). # Using env vars in conditionals has been an open issue for logstash since 2016! Workaround is to add a "flag" field. +# (@metadata fields are not saved to elasticsearch) filter { diff --git a/conf-logstash/40-aggregation.conf b/conf-logstash/40-aggregation.conf index 4aa583e2..a866be62 100644 --- a/conf-logstash/40-aggregation.conf +++ b/conf-logstash/40-aggregation.conf @@ -1,10 +1,15 @@ # This filter stitches together incoming flows that go together. -# + +# Values for ${variable-name:default-value} are obtained from an environment file. For docker, from the .env file; +# for bare-metal installations, /etc/logstash/logstash-env-vars - specified in the logstash systemd file) +# If values are not provided in an env file, the defaults/examples following the :'s are used. +# With a bare-metal installation, you may also just edit this file and fill in the values you want. + ## Fields most likely to be specific to a logstash pipeline: -## You may set these via environment variables ## aggregate_maps_path - must be unique for each logstash pipeline. Default is /tmp/logstash-aggregation-maps. -## inactivity_timeout - default is 5 min. -## timeout - the maximum length of a flow. Default is 1 hr. +## inactivity_timeout - when to declare a flow ended. +## timeout - the maximum length of a flow. + ## NOTE THAT THERE ARE SEPARATE SECTIONS FOR SFLOW AND NETFLOW, ## EDIT BOTH !!!! @@ -19,6 +24,7 @@ filter { add_field => { 'stitched_flows' => 0 } } } + else { # for aggregation, we need the 'start' or 'end' date, as well as as timestamp date { @@ -55,8 +61,8 @@ filter { # greater than inactivity_timeout, it ends the current flow and starts a new one. # ALSO, every 5 sec, it compares the ingest clock time of the last matching event to NOW. # If more than inactivity_timeout seconds have passed, it declares the flow finished.) - ## default 300 sec = 5 min - inactivity_timeout => "${inactivity_timeout:300}" + ## default 360 sec = 6 min + inactivity_timeout => "${inactivity_timeout:360}" # Active timeout # = maximum possible flow duration @@ -174,7 +180,7 @@ filter { # see comments above. MAKE SURE THE VALUES/DEFAULTS ARE THE SAME HERE. timeout_timestamp_field => '[start_date]' - inactivity_timeout => "${inactivity_timeout:300}" + inactivity_timeout => "${inactivity_timeout:360}" timeout => "${max_flow_timeout:3600}" push_map_as_event_on_timeout => true @@ -188,8 +194,8 @@ filter { map['stitched_flows'] ||= 0 map['stitched_flows'] += 1 - map['start'] ||= event.get('start') - map['end'] ||= event.get('end') + map['start'] ||= event.get('start') + map['end'] ||= event.get('end') map['meta'] ||= event.get('meta') map['values'] ||= event.get('values') map['tags'] ||= event.get('tags') diff --git a/conf-logstash/70-deidentify.conf b/conf-logstash/70-deidentify.conf index 9112e843..40d273f0 100644 --- a/conf-logstash/70-deidentify.conf +++ b/conf-logstash/70-deidentify.conf @@ -11,7 +11,12 @@ filter { # For IPV6 addresses, # In anonymize_ipv6.rb script, expand to full format with 8 hextets, then replace the last 4 with x:x:x:x. -# By default, deidentfication will be done. Users may disable it here, or in the environment file for Docker installations. +# Values for ${variable-name:default-value} are obtained from an environment file. For docker, from the .env file; +# for bare-metal installations, /etc/logstash/logstash-env-vars - specified in the logstash systemd file) +# If values are not provided in an env file, the defaults/examples following the :'s are used. +# With a bare-metal installation, you may also just edit this file and fill in the values you want. + +# By default, IP addresses are deidentified mutate { add_field => { "[@metadata][full_IPs_flag]" => "${full_IPs_flag:False}" } } diff --git a/conf-logstash/99-output-rabbit.conf b/conf-logstash/99-output-rabbit.conf index 28bd0fb1..caa32455 100644 --- a/conf-logstash/99-output-rabbit.conf +++ b/conf-logstash/99-output-rabbit.conf @@ -1,4 +1,7 @@ -##### COPY ANY CHANGES TO YOUR EXISTING VERSION AFTER AN UPGRADE ##### +# Values for ${variable-name:default-value} are obtained from an environment file. For docker, from the .env file; +# for bare-metal installations, /etc/logstash/logstash-env-vars - specified in the logstash systemd file) +# If values are not provided in an env file, the defaults/examples following the :'s are used. +# With a bare-metal installation, you may also just edit this file and fill in the values you want. output { diff --git a/conf-logstash/support/networkA-members-list.rb.example b/conf-logstash/support/networkA-members-list.rb.example index 8a6caebb..63fbf80a 100644 --- a/conf-logstash/support/networkA-members-list.rb.example +++ b/conf-logstash/support/networkA-members-list.rb.example @@ -1,12 +1,14 @@ -# This is an example of how to set up member or customer netblock mappings -# The name of the file must be networkA-members-list.rb -# (replace networkA with the name of the network in the filename and below) +# This is an example of how to set up member or customer netblock mappings. +# The name of the file must be networkA-members-list.rb. +# (replace "networkA" with the name of the actual network in the filename and 2 places below) # List of ASNs that include all the member netblocks (integers) +# An IP will be looked up only if the ASN is in this list. @asn_list['networkA'] = [1234, 4567] # List of netblocks and the desired organization name for each -# Best to put the biggest blocks/those with most flows at top +# The first match is returned, so it'll be fastest if you put the biggest blocks/those with most flows at top, +# but it is vital to put the most specific netblocks first. Put any "catch alls" at the bottom. @members['networkA'] = { "111.22.33.0/24" => "Member A", "444.55.66.0/32" => "Customer B", diff --git a/conf-logstash/support/sensor_groups.json b/conf-logstash/support/sensor_groups.json index 0d503d42..3976d12a 100644 --- a/conf-logstash/support/sensor_groups.json +++ b/conf-logstash/support/sensor_groups.json @@ -25,5 +25,6 @@ "^TACC.*": "TACC", "^tacc.*": "TACC", "^TransPAC.*": "TransPAC", - "^.*UCAR.*": "UCAR" + "^.*UCAR.*": "UCAR", + "^Utah.*": "Utah" } diff --git a/conf-logstash/support/sensor_types.json b/conf-logstash/support/sensor_types.json index 7ac568f3..355d1a40 100644 --- a/conf-logstash/support/sensor_types.json +++ b/conf-logstash/support/sensor_types.json @@ -24,5 +24,6 @@ "^.*SoX.*$": "Regional Network", "^Sun Corridor.*$": "Regional Network", "^tacc_netflows$": "Regional Network", - "^CERN.*$": "Facility Edge" + "^CERN.*$": "Facility Edge", + "^Utah.*$": "Campus" } diff --git a/env.example b/env.example index 92776e5a..f31f0673 100644 --- a/env.example +++ b/env.example @@ -1,5 +1,5 @@ # Sensor names that logstash will assign to flows -# == EXAMPLE VALUES MUST BE REPLACED == +# === EXAMPLE VALUES MUST BE REPLACED === sflowSensorName=The Sflow Sensor Name netflowSensorName=The Netflow Sensor Name @@ -12,10 +12,10 @@ rabbitmq_output_pw=guest rabbitmq_output_key=netsage_archive_input # Logstash Aggregation Filter settings -# Default inactivity_timeout is 5-minute. If no matching flows have come in for 5 minutes, end the aggregated flow. +# Default inactivity_timeout is 6-minute. If no matching flows have come in for 6 minutes, end the aggregated flow. # Default max_flow_timeout is 1 hour. This is the maximum allowed flow duration; longer flows will be broken up. -# Aggregation_maps_path is where flows undergoing aggregation are saved if logstash shuts down. Default is for Docker installs. -inactivity_timeout=300 +# Aggregation_maps_path is where flows undergoing aggregation are saved if logstash shuts down. The default is for Docker installs. +inactivity_timeout=360 max_flow_timeout=3600 aggregation_maps_path=/data/logstash-aggregation-maps @@ -62,20 +62,17 @@ full_IPs_flag=False XPACK_MONITORING_ENABLED=false # java heap size for logstash LS_JAVA_OPTS=-Xmx2g -Xms2g -# Do not change - the logstash aggregation filter requires that only one logstash worker is running! +# Do not change unless you are not using logstash aggregation! The aggregation filter requires one logstash worker only! PIPELINE_WORKERS=1 # for debugging ## LOG_LEVEL=debug -# Local RabbitMQ Server config +# Local RabbitMQ Server config (for the post-pmacct/pre-logstash queue) RABBITMQ_ERLANG_COOKIE='secret cookie' RABBIT_HOST=rabbit RABBITMQ_DEFAULT_USER=guest RABBITMQ_DEFAULT_PASS=guest discovery.type=single-node - -# Importer output rabbit host = Logstash input rabbit host -# default is to use the local rabbitmq server rabbitmq_input_host=rabbit rabbitmq_input_username=guest rabbitmq_input_pw=guest From 8463538b9c42cd07e75c855adeb3f4ad5766878f Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 8 Jun 2022 21:18:17 +0000 Subject: [PATCH 083/126] Modified Intro docs to remove nfdump and importer and add pmacct --- .../{elastic_search.md => elasticsearch.md} | 11 +++-- website/docs/pipeline/importer.md | 14 ------ website/docs/pipeline/intro.md | 14 ++++-- website/docs/pipeline/logstash.md | 47 ++++++++++++------- website/docs/pipeline/nfdump.md | 17 ------- website/docs/pipeline/pmacct.md | 14 ++++++ website/docs/pipeline/sensors.md | 14 ++++++ website/docs/pipeline/tstat.md | 2 +- website/sidebars.js | 4 +- 9 files changed, 76 insertions(+), 61 deletions(-) rename website/docs/pipeline/{elastic_search.md => elasticsearch.md} (90%) delete mode 100644 website/docs/pipeline/importer.md delete mode 100644 website/docs/pipeline/nfdump.md create mode 100644 website/docs/pipeline/pmacct.md create mode 100644 website/docs/pipeline/sensors.md diff --git a/website/docs/pipeline/elastic_search.md b/website/docs/pipeline/elasticsearch.md similarity index 90% rename from website/docs/pipeline/elastic_search.md rename to website/docs/pipeline/elasticsearch.md index c82a8dbd..8d7674da 100644 --- a/website/docs/pipeline/elastic_search.md +++ b/website/docs/pipeline/elasticsearch.md @@ -16,20 +16,20 @@ Flow data is ultimately saved to Elasticsearch. Following are the fields that ar |es_doc_id |4f46bef884... |Hash of meta.id and start time. May be used as doc id in ES to prevent duplicates, but see Notes elsewhere.| |meta.flow_type |sflow |'sflow', 'netflow', or 'tstat'| |meta.protocol |tcp |Protocol used| -|meta.sensor_id | snvl2-pw-sw-1-mgmt-2.cenic.net|Sensor name (set in importer config, may not always be a hostname) | +|meta.sensor_id |snvl2-pw-sw-1-mgmt-2.cenic.net|Sensor name (set in importer config, may not always be a hostname) | |meta.sensor_group |CENIC |Sensor group, usually the network | |meta.sensor_type |Regional Network |Sensor type ('Circuit', 'Regional Network', etc) | |meta.country_scope |Domestic |'Domestic', 'International', or 'Mixed', depending on countries of src and dst| -|meta.is_network_testing | no | 'yes' if discipline is 'CS.Network Testing and Monitoring' or port is one used for PerfSonar: 5001, 5101, or 5201| +|meta.is_network_testing |no |'yes' if discipline is 'CS.Network Testing and Monitoring' or port is one used for PerfSonar: 5001, 5101, or 5201| ### Source Fields (Destination Fields similarly with "dst") |name |example |description | |-----------------------|-----------------------|-----------------------------| -|meta.src_ip |171.64.68.x | deidentified IP address| +|meta.src_ip |171.64.68.x |deidentified IP address| |meta.src_port |80 |port used | |meta.src_asn |32 |Source ASN from the flow header or, in some cases, the ANS of the IP from the MaxMind GeoIP ASN database| -|meta.src_organization |Stanford University | organization that owns the AS from the CAIDA ASN-Organization database +|meta.src_organization |Stanford University | organization that owns the AS from the CAIDA ASN-Organization database |meta.src_location.lat | 37.423 | latitude of the IP from the MaxMind GeoIP City database| |meta.src_location.lon |-122.164 | longitude of the IP from the MaxMind GeoIP City database| |meta.src_country_name |United States | country of the IP from the MaxMind GeoIP City database| @@ -47,7 +47,7 @@ The [Science Registry](https://scienceregistry.netsage.global/rdb/) stores human |meta.scireg.src.org_abbr |Boston U |A shorter name for the organization. May not be the official abbreviation.| |meta.scireg.src.resource |BU - ATLAS |Descriptive resource name from SciReg | |meta.scireg.src.resource_abbr | |Resource abbreviation (if any)| -|meta.scireg.src.project_names |ATLAS |"Projects" that the resource is part of| +|meta.scireg.src.project_names |ATLAS |"Project(s)" that the resource is part of| |meta.scireg.src.latitude |37.4178 |Resource's latitude, as listed in the Science Registry| |meta.scireg.src.longitude |-122.178 |Resource's longitude, as listed in the Science Registry| @@ -109,6 +109,7 @@ The [Science Registry](https://scienceregistry.netsage.global/rdb/) stores human |@exit_time |Jun 9, 2020 @ 18:03:25.369 |The time the flow exited the pipeline | |@processing_time |688.31 |@exit_time minus @ingest_time. Useful for seeing how long stitching took. | |stitched_flows |13 |Number of flows that came into logstash that were stitched together to make this final one. 1 if no flows were stitched together. 0 for tstat flows, which are never stitched. | +|@sampling_corrected |yes |'yes' if sampling corrections have been done; 'no' otherwise, eg, for netflows before a template has been seen that includes the sampling rate. | |tags |maxmind src asn |Various info and error messages| |trial | 5 |Can be set in 40-aggregation.conf if desired| diff --git a/website/docs/pipeline/importer.md b/website/docs/pipeline/importer.md deleted file mode 100644 index 24b05c4b..00000000 --- a/website/docs/pipeline/importer.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -id: importer -title: Importer -sidebar_label: Importer ---- -A netsage-netflow-importer script reads any new nfcapd files that have come in after a configurable delay and writes the results to the "netsage_deidentifier_raw" RabbitMQ queue. -All flow data waits in the queue until it is read in and processed by the logstash pipeline. - -To read nfcapd files, the importer uses an nfdump command with the "-a" option to aggregate raw flows within the file by the "5-tuple," i.e., the source and destination IPs, ports, and protocol. The "-L" option is used to throw out any aggregated flows below a threshold number of bytes. This threshold is specified in the importer config file. - -### Configuration -Configuration files for the importer are netsage_netflow_importer.xml and netsage_shared.xml in /etc/grnoc/netsage/deidentfier/. Comments in the files briefly describe the options. See also the Deployment pages in these docs. - -To avoid re-reading nfcapd files, the importer stores the names of files that have already been read in /var/cache/netsage/netflow_importer.cache. diff --git a/website/docs/pipeline/intro.md b/website/docs/pipeline/intro.md index f4cce287..0c54059c 100644 --- a/website/docs/pipeline/intro.md +++ b/website/docs/pipeline/intro.md @@ -3,33 +3,37 @@ id: intro title: Intro sidebar_label: Intro --- +# Network Flows + +A flow is defined as a series of packets with the same source IP and port, destination IP and port, and protocal (the "5-tuple"). + # The NetSage Pipeline ## Description -The Netsage Flow Processing Pipeline is composed of several components for processing network flow data, including importing, deidentification, metadata tagging, flow stitching, etc. +The Netsage Flow Processing Pipeline is composed of several components for processing network flow data, including collection, deidentification, metadata tagging, flow stitching, etc. There are many ways the components can be combined, configured, and run. These documents will describe the standard "simple" set up and provide information for more complex configurations. ## Data Collection -In Netsage, sensor(s) are network devices configured to collect flow data ([tstat](http://tstat.polito.it/), [sflow](https://www.rfc-editor.org/info/rfc3176), or [netflow](https://www.cisco.com/c/en/us/products/collateral/ios-nx-os-software/ios-netflow/prod_white_paper0900aecd80406232.html)) and send it to a "pipeline host" for processing. +In Netsage, sensor(s) are network devices (eg, routers) configured to collect flow data ([tstat](http://tstat.polito.it/), [sflow](https://www.rfc-editor.org/info/rfc3176), or [netflow/IPFIX](https://www.cisco.com/c/en/us/products/collateral/ios-nx-os-software/ios-netflow/prod_white_paper0900aecd80406232.html)) and send it to a "pipeline host" for processing. Tstat flow data can be sent directly to the pipeline ingest RabbitMQ queue on the pipeline host using the Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) tool. This can be installed as usual or via Docker. -Sflow and netflow data from configured routers should be sent to the pipeline host where it is collected and stored into nfcapd files using [nfdump tools](https://github.com/phaag/nfdump). The Netsage project has packaged the nfdump tools into a [Docker container](https://github.com/netsage-project/docker-nfdump-collector) for ease of use. +Sflow and netflow data from configured sensors should be sent to the pipeline host where it is ingested by sfacctd and nfacctd processes - see [pmacct](https://github.com/pmacct/pmacct). (Previous versions of the pipeline used the nfdump package and a custom perl importer script.) ## Pipeline Components The Netsage Flow Processing Pipeline is made of the following components - - Importer: Perl scripts on the pipeline host that read nfcapd flow files and send the flow data to a RabbitMQ queue. ([Doc](importer.md), [in github](https://github.com/netsage-project/netsage-pipeline/blob/master/lib/GRNOC/NetSage/Deidentifier/NetflowImporter.pm)) + - [Pmacct](https://github.com/pmacct/pmacct): the pmacct package includes sfacctd and nfacctd daemons which receive sflow and netflow/IPFIX flows, respectively. They can also do some processing and filtering, but we use these options very minimally. They send the flows to a rabbitmq queue. - [RabbitMQ](https://www.rabbitmq.com/): Used for message passing and queuing of tasks. - [Logstash](https://www.elastic.co/logstash) pipeline: Performs a variety of operations on the flow data to transform it and add additional information. ([Doc](logstash.md)) - [Elasticsearch](https://www.elastic.co/what-is/elasticsearch): Used for storing the final flow data. ## Visualization -[Grafana](https://grafana.com/oss/grafana/) or [Kibana](https://www.elastic.co/kibana) can be used to visualize the data stored in elasticsearch. Netsage Grafana Dashboards are available [in github](https://github.com/netsage-project/netsage-grafana-configs). +[Grafana](https://grafana.com/oss/grafana/) or [Kibana](https://www.elastic.co/kibana) (with appropriate credentials) can be used to visualize the data stored in elasticsearch. Netsage Grafana Dashboards are available [in github](https://github.com/netsage-project/netsage-grafana-configs). ## Pipeline Installation diff --git a/website/docs/pipeline/logstash.md b/website/docs/pipeline/logstash.md index 658b240a..76a7398f 100644 --- a/website/docs/pipeline/logstash.md +++ b/website/docs/pipeline/logstash.md @@ -4,12 +4,12 @@ title: Logstash Pipeline sidebar_label: Logstash --- -The Logstash portion of the Netsage Pipeline reads in flows from a RabbitMQ queue, performs various transformations and adds additional information to them, then sends them to a location specified in the output logstash config, eventually ending up in an Elasticsearch instance. +The Logstash portion of the Netsage Pipeline reads flows from a RabbitMQ queue, performs various transformations and adds additional information to them, then sends them to a location specified in the output logstash config, eventually ending up in an Elasticsearch instance. -Logstash config files invoke various logstash "filters" and actions. These conf files are located in /etc/logstash/conf.d/. See below for a brief description of what each does and check the files for comments. +Logstash config files invoke various logstash "filters" and actions. In the bare metal installation, these conf files are located in /etc/logstash/conf.d/. In a docker installation, the *.conf files in the git checkout, in conf-logstash/, are used. See below for a brief description of what each does and check the files for comments. Notes: - - All \*.conf files in conf.d/ are executed in alphabetical order, as if they were one huge file. Those ending in .disabled will not be executed (assuming 'path.config: "/etc/logstash/conf.d/*.conf"' in /etc/logstash/pipelines.yml). + - All \*.conf files in conf.d/ or conf-logstash/ are executed in alphabetical order, as if they were one huge file. Those ending in .disabled will not be executed (assuming 'path.config: "/etc/logstash/conf.d/*.conf"' in /etc/logstash/pipelines.yml). - If actions in a particular .conf file are not needed in your particular case, they can be removed or the file disabled, but check carefully for effects on downstream configs. - MaxMind, CAIDA, and Science Registry database files required by the geoip and aggregate filters are downloaded from scienceregistry.netsage.global via cron jobs weekly or daily. (MaxMind data can change weekly, CAIDA quarterly, Science Registry information randomly.) **NOTE that new versions won't be used in the pipeline until logstash is restarted.** There is a cron file to do this also, though it's not running in Docker deployments. Similarly for other support files, eg, those used in 90-additional-fields.conf. - Lookup tables for 55-member-orgs.conf that we have compiled are available from sciencregistry.grnoc.iu.edu. See the cron files provided. These will not be updated often, so you may run the cron jobs or not. You will need to provide lists for other networks yourself or ask us. @@ -20,7 +20,11 @@ The main things done in each conf file are as follows. ### 01-input-rabbit.conf -Reads flows from a rabbitmq queue. (The ".disabled" extenstion can be removed from other 01-input configs available in conf.d/ to get flows from other sources.) +Reads flows from a rabbitmq queue. (The ".disabled" extention can be removed from other 01-input configs available in conf.d/ to get flows from other sources, probably for testing.) + +### 05-translate-pmacct.conf + +Renames fields provided by pmacct processes to match what the pipeline uses (from before we used pmacct). ### 10-preliminaries.conf @@ -31,24 +35,31 @@ sets duration and rates to 0 if duration is <= 0.002 sec (because tiny durations ### 15-sensor-specific-changes.conf -Makes any changes to fields needed for specific sensors. This config currently provides 1) the ability to drop all flows that do not use interfaces (ifindexes) in a specfied list; lists can be sensor-specific, 2) the ability to change the sensor name for flows from a specified sensor which use a certain interface, and 3) the ability to apply a sampling rate correction manually for named sensors. You may edit the file in a bare-metal installation and specify everything explicitly (upgrades will not overwrite this config) or you may use the environment file specified in the systemd unit file. For Docker installations, use the .env file to specifiy the parameters. By default, this config will do nothing since the flags will be set to False. +Makes any changes to fields needed for specific sensors. This config currently provides 1) the ability to drop all flows that do not use interfaces (ifindexes) in a specfied list; lists can be sensor-specific, 2) the ability to change the sensor name for flows from a specified sensor which use a certain interface, 3) the ability to apply a sampling rate correction manually for named sensors, and 4) the ability to add subnet filtering for flows from specified sensors. + +You may edit the file in a bare-metal installation and specify everything explicitly (upgrades will not overwrite this config) or you may use the environment file specified in the systemd unit file. For Docker installations, use the .env file to specifiy the parameters. By default, this config will do nothing since the flags will be set to False. ### 20-add_id.conf -Adds a unique id (evenutally called meta.id) which is a hash of the 5-tuple of the flow (src and dst ips and ports, and protocol) plus the sensor name. This id is used for aggregating (stitching) in the next step. +Adds a unique id (evenutally called meta.id) which is a hash of the 5-tuple of the flow (src and dst ips and ports, and protocol) plus the sensor name. ### 40-aggregation.conf -Stitches together flows from different nfcapd files into longer flows, matching them up by meta.id and using a specified inactivity_timeout to decide when to start a new flow. +Stitches incoming flows into longer flows. The inactive timeout is 6 minutes, by default. So, if the time from the start of the current flow to the start time of the last matching flow is over 6 minutes, declare the previous aggregated flow ended and start a new one with the current incoming flow. The default active timeout is 1 hour, meaning any flows over 1 hour in length will be split up into 1 hour chunks. This may require the start time to be adjusted, to cut off previous whole hours. -Notes: - - By default, 5-minute nfcapd files are assumed and the inactivity_timeout is set to 10.5 minutes. If more than 10.5 min have passed between the start of the current flow and the start of the last matching one, do not stitch them together. - - If your nfcapd files are written every 15 minutes, change the inactivity_timeout to at least 16 minutes. - - There is another "timeout" setting which is basically the maximum duration of a stitched flow (default: 24 hr). +For sflow, aggregation uses the 5-tuple plus sensor name. +For netflow, aggregation uses the 5-tuple plus sensor name plus start time. This means that when there's a timeout at the router (default inactive timeout is usually 15 sec), the flows will stay separate. (In certain grafana dashboards, they will be added together.) Start times of incoming flows are adjusted. See comments in file. + +Notes - When logstash shuts down, any flows "in the aggregator" will be written out to aggregate_maps_path (default: /tmp/logstash-aggregation-maps). The file is then read back in when logstash is restarted so aggregation can continue. - Your logstash pipeline can have only 1 worker or aggregation is not going to work! This is set in the logstash config file. - Tstat flows come in already complete, so no aggregation is done on those flows. +### 41-thresholds.conf + +Drops flows that are too small - under 10 MB, by default. +For flows with small durations, sets rates to 0 because sampling makes them too inaccurate. + ### 45-geoip-tagging.conf Queries the MaxMind GeoLite2-City database by IP to get src and dst Countries, Continents, Latitudes, and Longitudes; @@ -87,6 +98,8 @@ Notes: Replaces the last octet of IPv4 addresses and the last 4 hextets of IPv6 addresses with x's in order to deidentify them. +Deidentfication can be skipped by using an option in the environment file. + ### 80-privatize.org.conf Removes information about Australian organizations (or, with modification, any country that has privacy rules that require us not to identify organizations). @@ -99,19 +112,19 @@ Copies Science Registry organization and location values, if they exist, to the ### 90-additional-fields.conf Sets additional quick and easy fields. Supporting mapping or ruby files are used - see support/ and ruby/ in conf.d/. Currently we have (for Netsage's use): - - sensor_group = TACC, AMPATH, etc. (based on matching sensor names to regexes) - - sensor_type = Circuit, Archive, Exchange Point, or Regional Network (based on matching sensor names to regexes) + - sensor_group = TACC, NEAAR, I-Light, etc. (based on matching sensor names to regexes) + - sensor_type = Circuit, Archive, Exchange Point, Regional Network, Facility Edge, Campus (based on matching sensor names to regexes) - country_scope = Domestic, International, or Mixed (based on src and dst countries and possibly continents, where Domestic = US, Puerto Rico, or Guam) - - is_network_testing = yes, no (yes if discipline from the science registry is 'CS.Network Testing and Monitoring' or port = 5001, 5101, or 5201) + - is_network_testing = yes, no (yes if discipline from the science registry is 'CS.Network Testing and Monitoring' or if port = 5001, 5101, or 5201) - es_doc_id = hash of meta.id and the start time of the flow. If this id is used as the document id in elasticsearch, flows that are mistakenly input more than once will update existing documents rather than be added as duplicates. (NOTE: due to how netflow works, use es_doc_id as the ES document id only for sflow!) ### 95-cleanup.conf -Does small misc. tasks at the end like rename, remove, or convert fields +Does small miscellaneous tasks at the end like rename, remove, or convert fields ### 98-post-process.conf -Adds @exit_time and @processing_time (these are mainly for developers) +Adds @exit_time, @processing_time, and @pipeline_ver (these are mainly for developers) ### 99-output-rabbit.conf @@ -119,7 +132,7 @@ Sends results to a final RabbitMQ queue. (".disabled" can be removed from other ### Final Stage -In the GlobalNOC-Netsage case, the output filter writes the flows to a network-specific RabbitMQ queue on another host and the last stage is a separate logstash pipeline on a 3rd host. The latter reads flows from the final queue using a rabbitmq input filter and sends it into elasticsearch using an elasticsearch output filter with a mapping template which sets data types for the fields. +In the GlobalNOC-Netsage case, the output filter writes the flows to a network-specific RabbitMQ queue at Indiana University and the last stage is a separate logstash pipeline. The latter reads flows from the final queue using a rabbitmq input filter and sends it into elasticsearch using an elasticsearch output filter with a mapping template which sets data types for the fields. ## Field names diff --git a/website/docs/pipeline/nfdump.md b/website/docs/pipeline/nfdump.md deleted file mode 100644 index b9519282..00000000 --- a/website/docs/pipeline/nfdump.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -id: nfdump -title: Sflow/Netflow Data Collection -sidebar_label: Sflow/Netflow Data ---- - -Sflow and Netflow export can be configured on appropriate network devices. Netsage uses tools in the Nfdump package to collect and process the resulting flow data. The toolset supports netflow v1, v5/v7, v9, IPFIX and SFLOW, IPv4 as well as IPv6. - -## Netsage Usage - -Nfcapd and/or sfcapd processes (from the nfdump package) are used to collect incoming netflow and/or sflow data and save it to disk in nfcapd files. The files are then read by the [importer](importer), which uses an nfdump command, and sent to RabbitMQ. From there, the [logstash](logstash) pipeline ingests the flows and processes them in exactly the same way as it processes tstat flows. The data is eventually saved in elasticsearch and visualized by [grafana dashboards](https://github.com/netsage-project/netsage-grafana-configs). - -One may also use the nfdump command interactively to view the flows in a nfcapd file in a terminal window. - -## Docker Deployment - -The nfdump/nfcapd/sfcapd processes can be invoked locally or using a Docker container. The Docker deployment of the Pipeline uses an nfdump Docker container. (See the Docker Deployment Guide.) The Docker image definitions can be found [HERE](https://github.com/netsage-project/docker-nfdump-collector) diff --git a/website/docs/pipeline/pmacct.md b/website/docs/pipeline/pmacct.md new file mode 100644 index 00000000..a896dd6e --- /dev/null +++ b/website/docs/pipeline/pmacct.md @@ -0,0 +1,14 @@ +--- +id: pmacct +title: Pmacct +sidebar_label: Pmacct +--- +As flow data comes into the pipeline host, it is received by nfacctd and sfacctd processes which are listening on the proper ports. +These do sampling corrections, add sensor name information, and send the flows to a rabbitmq queue. +Netsage also uses sfacctd to do some preliminary aggregation for sflow, to cut down on the work that logstash needs to do. By default, all samples, with the same 5-tuple, within each 5 minute window are aggregated into one incoming flow. + +### Configuration +For netsage, pretag.map files are required, one for each nfacctd or sfacctd process. In the bare-metal installation, these are in /etc/pmacct/. For the default docker deployment, we have one for sflow, one for netflow: sfacct-pretag.map and nfacct-pretag.map. These specify the sensor names which are added to the flows. See the comments in the files and the Deployment pages in these docs. + +Configuration files are also required for each nfacctd or sfacctd process. In the bare-metal installation, these are also in /etc/pmacct/. For the default docker deployment, we have just two files - sfacctd.conf and nfacctd.conf. See comments within the files. + diff --git a/website/docs/pipeline/sensors.md b/website/docs/pipeline/sensors.md new file mode 100644 index 00000000..45770f6e --- /dev/null +++ b/website/docs/pipeline/sensors.md @@ -0,0 +1,14 @@ +--- +id: sensors +title: Sflow/Netflow Data Export +sidebar_label: Sflow/Netflow Data +--- + +Sflow and Netflow (including IPFIX) export can be configured on appropriate network devices. Routers and switches have flow export capabililties built in, although they can somtimes be buggy. +We have assumed that each sensor sends flow data to a different port on the pipeline host. Certainly if different sensors use different sampling rates, this needs to be adhered to. + +Sflow collects samples of packets passing through the device and sends them to a collector. The sampling rate can be configured, eg, 1 out of every 100 packets. It is assumed that, in our example, each observed packet represents 100 similar packets. To approximately correct for sampling, the number of bytes in the packet is multiplied by 100. The sampling rate compared to the number of packets per second flowing through the device determines how accurate this approximation is. It is of course, least accurate for very short flows. + +Netflow may also sample packets, and the same sampling corrections apply, but it also keeps track of the flows and aggregates by the so-called 5-tuple (source and destination IPs, ports, and protocol). The "active timeout" determines how often netflow sends out an "update" on the flows it is aggregating. The "inactive timeout" determines how long to wait for another matching packet, that is when to declare that a flow has ended. +Typically, the active timeout is 1 minute and the inactive timeout 15 seconds. For flows longer than 1 minute, an "update" is sent out every minute. The tricky thing is that these updates all have the same start time (the time the first packet was observed), although the end time (the time the last packet was observed) and duration change, and the number of bytes and packets reported corresponds only to the period since the last update. +The netsage pipeline attempts to combine the updates to aggregate (and also break up) long flows correctly. diff --git a/website/docs/pipeline/tstat.md b/website/docs/pipeline/tstat.md index baab97c5..079d422a 100644 --- a/website/docs/pipeline/tstat.md +++ b/website/docs/pipeline/tstat.md @@ -1,6 +1,6 @@ --- id: tstat -title: Tstat Data Collection +title: Tstat Data Export sidebar_label: Tstat Data --- diff --git a/website/sidebars.js b/website/sidebars.js index 6bae0e65..12c9349d 100644 --- a/website/sidebars.js +++ b/website/sidebars.js @@ -3,8 +3,8 @@ module.exports = { Pipeline: [ "pipeline/intro", "pipeline/tstat", - "pipeline/nfdump", - "pipeline/importer", + "pipeline/sensors", + "pipeline/pmacct", "pipeline/logstash", "pipeline/elastic", ], From 17c7c3ec2eb2f7079830085d3c4c7b7729741f2c Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 8 Jun 2022 21:59:50 +0000 Subject: [PATCH 084/126] Few more edits to Intro docs --- website/docs/pipeline/elasticsearch.md | 2 +- website/docs/pipeline/intro.md | 33 +++++++++++++------------- website/docs/pipeline/tstat.md | 8 +++---- 3 files changed, 20 insertions(+), 23 deletions(-) diff --git a/website/docs/pipeline/elasticsearch.md b/website/docs/pipeline/elasticsearch.md index 8d7674da..05decdef 100644 --- a/website/docs/pipeline/elasticsearch.md +++ b/website/docs/pipeline/elasticsearch.md @@ -108,8 +108,8 @@ The [Science Registry](https://scienceregistry.netsage.global/rdb/) stores human |@timestamp |Jun 9, 2020 @ 18:03:21.703 |The time the flow entered the logstash pipeline for tstat flows, or the time stitching finished and the event exited the aggregation filter for other flows.| |@exit_time |Jun 9, 2020 @ 18:03:25.369 |The time the flow exited the pipeline | |@processing_time |688.31 |@exit_time minus @ingest_time. Useful for seeing how long stitching took. | -|stitched_flows |13 |Number of flows that came into logstash that were stitched together to make this final one. 1 if no flows were stitched together. 0 for tstat flows, which are never stitched. | |@sampling_corrected |yes |'yes' if sampling corrections have been done; 'no' otherwise, eg, for netflows before a template has been seen that includes the sampling rate. | +|stitched_flows |13 |Number of flows that came into logstash that were stitched together to make this final one. 1 if no flows were stitched together. 0 for tstat flows, which are never stitched. | |tags |maxmind src asn |Various info and error messages| |trial | 5 |Can be set in 40-aggregation.conf if desired| diff --git a/website/docs/pipeline/intro.md b/website/docs/pipeline/intro.md index 0c54059c..5534cf58 100644 --- a/website/docs/pipeline/intro.md +++ b/website/docs/pipeline/intro.md @@ -3,39 +3,38 @@ id: intro title: Intro sidebar_label: Intro --- -# Network Flows +## Network Flows A flow is defined as a series of packets with the same source IP and port, destination IP and port, and protocal (the "5-tuple"). -# The NetSage Pipeline - -## Description +## The NetSage Pipeline The Netsage Flow Processing Pipeline is composed of several components for processing network flow data, including collection, deidentification, metadata tagging, flow stitching, etc. There are many ways the components can be combined, configured, and run. These documents will describe the standard "simple" set up and provide information for more complex configurations. -## Data Collection - -In Netsage, sensor(s) are network devices (eg, routers) configured to collect flow data ([tstat](http://tstat.polito.it/), [sflow](https://www.rfc-editor.org/info/rfc3176), or [netflow/IPFIX](https://www.cisco.com/c/en/us/products/collateral/ios-nx-os-software/ios-netflow/prod_white_paper0900aecd80406232.html)) and send it to a "pipeline host" for processing. +### Flow Export -Tstat flow data can be sent directly to the pipeline ingest RabbitMQ queue on the pipeline host using the Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) tool. This can be installed as usual or via Docker. +In Netsage, "sensor(s)" are "flow exporters," i.e., network devices such as routers that are configured to collect flow data ([tstat](http://tstat.polito.it/), [sflow](https://www.rfc-editor.org/info/rfc3176), or [netflow/IPFIX](https://www.cisco.com/c/en/us/products/collateral/ios-nx-os-software/ios-netflow/prod_white_paper0900aecd80406232.html)) and send it to a "Netsage pipeline" on a "pipeline host" for processing. -Sflow and netflow data from configured sensors should be sent to the pipeline host where it is ingested by sfacctd and nfacctd processes - see [pmacct](https://github.com/pmacct/pmacct). (Previous versions of the pipeline used the nfdump package and a custom perl importer script.) - -## Pipeline Components +### Pipeline Components The Netsage Flow Processing Pipeline is made of the following components - [Pmacct](https://github.com/pmacct/pmacct): the pmacct package includes sfacctd and nfacctd daemons which receive sflow and netflow/IPFIX flows, respectively. They can also do some processing and filtering, but we use these options very minimally. They send the flows to a rabbitmq queue. - - [RabbitMQ](https://www.rabbitmq.com/): Used for message passing and queuing of tasks. - - [Logstash](https://www.elastic.co/logstash) pipeline: Performs a variety of operations on the flow data to transform it and add additional information. ([Doc](logstash.md)) + - [RabbitMQ](https://www.rabbitmq.com/): Used for message queueing and passing at a couple of points in the full pipeline. + - [Logstash](https://www.elastic.co/logstash): A logstash pipeline performs a variety of operations on the flow data to transform it and add additional information. ([Doc](logstash.md)) - [Elasticsearch](https://www.elastic.co/what-is/elasticsearch): Used for storing the final flow data. -## Visualization +Sflow and netflow should be configured to send data to ports on the pipeline host (a different port for each sensor). Pmacct processes will be listening on those ports. -[Grafana](https://grafana.com/oss/grafana/) or [Kibana](https://www.elastic.co/kibana) (with appropriate credentials) can be used to visualize the data stored in elasticsearch. Netsage Grafana Dashboards are available [in github](https://github.com/netsage-project/netsage-grafana-configs). +Tstat flow data can be sent directly to the ingest RabbitMQ queue on the pipeline host using the Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) tool. This can be installed as usual or via Docker. + +### Pipeline Installation + +Originally, the pipeline was deployed by installing all of the components individually on one or more servers (the "BareMetal" or "Manual" Install). More recently, we've also added a Docker deployment option. For simple scenerios having just one sflow and/or one netflow sensor (and any number of tstat sensors), the basic "Docker Installation" should suffice. The "Docker Advanced Options" guide will help when there are more sensors and/or other customizations required. + +## Visualization -## Pipeline Installation +[Grafana](https://grafana.com/oss/grafana/) or [Kibana](https://www.elastic.co/kibana) (with appropriate credentials) can be used to visualize the data stored in elasticsearch. Netsage grafana dashboards are available in github [here](https://github.com/netsage-project/netsage-grafana-configs). -Originally, the pipeline was deployed by installing all of the components individually on one or more servers (the "BareMetal" or "Manual" Install). More recently, we've also added a Docker deployment option. With simple pipelines having just one sflow and/or one netflow sensor (and any number of tstat sensors), the basic "Docker Installation" should suffice. The "Docker Advanced Options" guide will help when there are more sensors and/or other customizations required. diff --git a/website/docs/pipeline/tstat.md b/website/docs/pipeline/tstat.md index 079d422a..dc16a413 100644 --- a/website/docs/pipeline/tstat.md +++ b/website/docs/pipeline/tstat.md @@ -4,13 +4,11 @@ title: Tstat Data Export sidebar_label: Tstat Data --- -## Netsage GitHub Project +[Tstat](http://tstat.polito.it/) is a passive sniffer that provides insights into traffic patterns. -[Tstat](http://tstat.polito.it/) is a passive sniffer that provides insights into traffic patterns. The Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) project provides client programs to parse the captured data and send it to a rabbitmq host where it can then be processed by the [logstash pipeline](logstash), stored in elasticsearch, and finally displayed in our Grafana [dashboards](https://github.com/netsage-project/netsage-grafana-configs). +The Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) project provides client programs to parse the captured data and send it to a rabbitmq host where it can then be processed by the [logstash pipeline](logstash), stored in elasticsearch, and finally displayed in our Grafana [dashboards](https://github.com/netsage-project/netsage-grafana-configs). -## Docker - -Netsage Docker images exist on Docker Hub for tstat and tstat_transport. This is still in a beta state and is in development. The initial documentation is available [here](https://github.com/netsage-project/tstat-transport/blob/master/docs/docker.md). +Docker images exist on Docker Hub for tstat and tstat_transport. This is still in a beta state and is in development. The initial documentation is available [here](https://github.com/netsage-project/tstat-transport/blob/master/docs/docker.md). From 3078ac2172f8f8a95583cecbdb331653fc717ef7 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 9 Jun 2022 17:42:30 +0000 Subject: [PATCH 085/126] Tag 0.0.0.x flows and later drop them by default --- conf-logstash/95-cleanup.conf | 18 +++++++++--------- conf-logstash/99-output-rabbit.conf | 14 ++++++++++++-- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/conf-logstash/95-cleanup.conf b/conf-logstash/95-cleanup.conf index 63f94a37..62babd2f 100644 --- a/conf-logstash/95-cleanup.conf +++ b/conf-logstash/95-cleanup.conf @@ -1,14 +1,14 @@ filter { - # Tag flows with 2 missing IPs (0.0.0.0s). - # Check/edit here or the 99-outputs file for any action to be taken based on this tag. - if [meta][src_ip] == "0.0.0.x" and [meta][dst_ip] == "0.0.0.x" - or ([meta][src_ip] == "0.0.0.0" and [meta][dst_ip] == "0.0.0.0") { - mutate { - id => "95-1" - add_tag => ["Missing IPs"] - } - } + # Tag flows with 2 missing IPs (0.0.0.0s). + # Check/edit the 99-outputs file for any action to be taken based on this tag. + if ([meta][src_ip] == "0.0.0.x" and [meta][dst_ip] == "0.0.0.x") + or ([meta][src_ip] == "0.0.0.0" and [meta][dst_ip] == "0.0.0.0") { + mutate { + id => "95-1" + add_tag => ["Missing IPs"] + } + } # rename the 5-tuple+sensor hash to meta.id if [flow_fingerprint] { diff --git a/conf-logstash/99-output-rabbit.conf b/conf-logstash/99-output-rabbit.conf index caa32455..7f6adbfd 100644 --- a/conf-logstash/99-output-rabbit.conf +++ b/conf-logstash/99-output-rabbit.conf @@ -3,8 +3,18 @@ # If values are not provided in an env file, the defaults/examples following the :'s are used. # With a bare-metal installation, you may also just edit this file and fill in the values you want. -output { +# By default, flows with missing IPs are dropped. This can be changed in the env file. +filter { + mutate { + add_field => { "[@metadata][drop_missing_IPs_flag]" => "${drop_missing_IPs_flag:True}" } + } + if [@metadata][drop_missing_IPs_flag] == "True" and "Missing IPs" in [tags] { + drop{ id => "99-1" } + } +} + +output { #-- To send results to rabbitmq # "${env-var:default-value}" will be replaced by the env-var environment variable value, or default-value if that is not set. # Rabbitmq host may be, eg, "localhost", "xx.xx.xx.xx", "["hostname1", "hostname2"]. @@ -19,6 +29,6 @@ output { connection_timeout => 10000 durable => true persistent => false + id => "99-2" } - } From 0f043c40fe6aac0739ae35cdbde847f95a93b1c1 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 9 Jun 2022 17:43:34 +0000 Subject: [PATCH 086/126] commiting yarn.lock --- website/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/yarn.lock b/website/yarn.lock index 07aeee5f..b3362c9a 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -2795,9 +2795,9 @@ caniuse-api@^3.0.0: lodash.uniq "^4.5.0" caniuse-lite@^1.0.0, caniuse-lite@^1.0.30000981, caniuse-lite@^1.0.30001109, caniuse-lite@^1.0.30001125, caniuse-lite@^1.0.30001181, caniuse-lite@^1.0.30001196: - version "1.0.30001255" - resolved "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001255.tgz" - integrity sha512-F+A3N9jTZL882f/fg/WWVnKSu6IOo3ueLz4zwaOPbPYHNmM/ZaDUyzyJwS1mZhX7Ex5jqTyW599Gdelh5PDYLQ== + version "1.0.30001350" + resolved "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001350.tgz" + integrity sha512-NZBql38Pzd+rAu5SPXv+qmTWGQuFsRiemHCJCAPvkoDxWV19/xqL2YHF32fDJ9SDLdLqfax8+S0CO3ncDCp9Iw== ccount@^1.0.0, ccount@^1.0.3: version "1.1.0" From 37499f4107608e7c87285888432b0d9ebfe6da31 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 9 Jun 2022 18:30:42 +0000 Subject: [PATCH 087/126] Added list of changes so far to CHANGES.md --- CHANGES.md | 29 +++++++++++++++++++++++++++++ conf-logstash/99-output-rabbit.conf | 3 +-- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 98be2368..56f9d78d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,32 @@ +------------------------------------------------------ +## GRNOC NetSage Pipeline 2.0.0 -- +------------------------------------------------------ +NEW PACKAGE NAME, PMACCT INSTEAD OF NFDUMP AND IMPORTER + +Features: + * Renamed package to grnoc-netsage-pipeline + * Got rid of importer references, requirements, files, etc. + * Used the %post section in the spec file to check to see if pmacct is installed. + * Added systemd unit files for sfacctd and nfacctd (default will be 1 sflow, 1 netflow source, for docker installs) + * Added default sfacct and nfacct config files in conf-pmacct/ (/etc/pmacct/). + * Added default pre_tag_map files for the default ports and sensor names. + * Added 05-translate-pmacct.conf logstash config. + * Revised 40-aggregation.conf to deal with pmacct; separate sections for sflow and netflow. + * For netflow, in 40-aggregation.conf, adjust start time of incoming flows if duration is over the active timeout. ("updates" to long lasting flows) + * Added 41-thresholds.conf - applies size threshold of 10 MB (otherwise drop) and duration threshold of 0.1 sec (otherwise set rates to 0) + * Sampling rate corrections will be done in logstash only if requested in the env file AND a correction has not yet been applied by pmacct. * Sensor list for sampling rate corrections in the env file is now semicolon-delimited. + * New field: @sampling_corrected = yes/no. If sampling rate correction has been applied by pmacct or logstash, value will be yes. + * If a sampling rate correction is applied by logstash, add a tag with the rate. + * Added CERN and Utah regexes to sensor type and group files. + * Added env file option to skip de-identification. + * The default inactive timeout for logstash aggregation has been set to 6 minutes (to go with 5 minute sflow aggregation by sfacctd) + + * Documentation updates + * Dependabot automatic remediations of vulnerabilites (for docusaurus) + +Bugs: + * Fixed ifindex filtering to be able to filter only specified sensors and keep all flows for other sensors; allow "ALL" for sensor names or interfaces. + ------------------------------------------------------ ## GRNOC NetSage Deidentfier 1.2.12 -- Jan 4, 2022 ------------------------------------------------------ diff --git a/conf-logstash/99-output-rabbit.conf b/conf-logstash/99-output-rabbit.conf index 7f6adbfd..6519274a 100644 --- a/conf-logstash/99-output-rabbit.conf +++ b/conf-logstash/99-output-rabbit.conf @@ -16,8 +16,7 @@ filter { output { #-- To send results to rabbitmq - # "${env-var:default-value}" will be replaced by the env-var environment variable value, or default-value if that is not set. - # Rabbitmq host may be, eg, "localhost", "xx.xx.xx.xx", "["hostname1", "hostname2"]. + # Rabbitmq host may be localhost, xx.xx.xx.xx, ["hostname1", "hostname2"], etc. # Change the queue key and exchange, if needed. rabbitmq { host => "${rabbitmq_output_host:localhost}" From 9a0775dcdd2e6149d87d479b5b64701243e7c5b0 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 9 Jun 2022 18:34:35 +0000 Subject: [PATCH 088/126] One more thing in CHANGES file --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 56f9d78d..1f44b24f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -20,6 +20,7 @@ Features: * Added CERN and Utah regexes to sensor type and group files. * Added env file option to skip de-identification. * The default inactive timeout for logstash aggregation has been set to 6 minutes (to go with 5 minute sflow aggregation by sfacctd) + * 0.0.0.x and 0.0.0.0 flows are tagged, and dropped by default. Unadvertised option to keep them is available in the env file. * Documentation updates * Dependabot automatic remediations of vulnerabilites (for docusaurus) From e17c88853baf7d21c4c7ae8bd252230c798c6909 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 9 Jun 2022 18:41:30 +0000 Subject: [PATCH 089/126] Updated spec file to refer to last-tested version of pmacct --- grnoc-netsage-pipeline.spec | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/grnoc-netsage-pipeline.spec b/grnoc-netsage-pipeline.spec index 46831fe4..0970f3f1 100644 --- a/grnoc-netsage-pipeline.spec +++ b/grnoc-netsage-pipeline.spec @@ -46,7 +46,7 @@ BuildArch: noarch Requires: wget Requires: logstash >= 7.16.2 Requires: rubygem-ipaddress -#Requires: pmacct = 1.7.7 (Not installed by rpm; see post section below for a check. Update ver num there!) +#Requires: pmacct > 1.7.7 (really date > 10/12/21. Not installed by rpm; see post section below for a check. Update ver num there!) %description GRNOC NetSage Flow-Processing Pipeline @@ -180,12 +180,12 @@ if [ -f /usr/local/sbin/nfacctd ]; then echo "PLEASE CHECK: " echo "It looks like pmacct has been installed." echo "Check the version with sfacctd -V and nfacctd -V." - echo "The Netage Pipeline has been tested with version 1.7.7." + echo "The Netage Pipeline has been tested with version 1.7.8-git from 2022/06/02. (>1.7.7 is required.)" else echo "WARNING: " echo "Required package pmacct does not appear to have been installed. " echo "See the NDCA doc or pmacct on github for instructions." - echo "The Netage Pipeline has been tested with version 1.7.7." + echo "The Netage Pipeline has been tested with version 1.7.8-git from 2022/06/02. (>1.7.7 is required.)" fi echo " " From 302c58406cf4b5d17e51af4dccc4cd5cb186788d Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 9 Jun 2022 21:43:49 +0000 Subject: [PATCH 090/126] In netflow aggregation, use env var instead of hardcoding 1 hr start time cuts. Also fixed indenting. --- conf-logstash/40-aggregation.conf | 359 +++++++++++++++--------------- 1 file changed, 185 insertions(+), 174 deletions(-) diff --git a/conf-logstash/40-aggregation.conf b/conf-logstash/40-aggregation.conf index a866be62..af155841 100644 --- a/conf-logstash/40-aggregation.conf +++ b/conf-logstash/40-aggregation.conf @@ -19,112 +19,112 @@ filter { # Tstat only reports complete flows, so no stitching is needed! # Just add stitched_flows=0 (means no stitching attempted) if [meta][flow_type] == 'tstat' { - mutate { - id => "40-1" - add_field => { 'stitched_flows' => 0 } - } + mutate { + id => "40-1" + add_field => { 'stitched_flows' => 0 } + } } else { # for aggregation, we need the 'start' or 'end' date, as well as as timestamp - date { - id => "40-2" - match => [ '[start]', 'UNIX' ] - target => '[start_date]' - } - date { - id => "40-3" - match => [ '[end]', 'UNIX' ] - target => '[end_date]' - } + date { + id => "40-2" + match => [ '[start]', 'UNIX' ] + target => '[start_date]' + } + date { + id => "40-3" + match => [ '[end]', 'UNIX' ] + target => '[end_date]' + } } # === SFLOW === # Aggregate on hash of 5-tuple + sensor # Incoming events may be single samples or results from partial aggregation/stitching by sfacctd. if [meta][flow_type] == "sflow" { - aggregate { - id => "40-4" - # Events that have matching task_id's will be aggregated. - task_id => '%{[flow_fingerprint]}' - - # Save the task_id value to this field in the aggregated event on timeout - timeout_task_id_field => "[flow_fingerprint]" - - # Use this field when determining if timeouts have occurred, in case we are processing historical data. - # It'll actually look at values of this field AND the clock times at which events come in. (Must be type 'date') - timeout_timestamp_field => '[start_date]' - - # Inactive timeout - # A flow is assumed to have ended if more than inactivity_timeout seconds have passed since the last matching event. - # (Aggregator compares timeout_timestamp_field of the current matching event and of the last matching event. If the diff is - # greater than inactivity_timeout, it ends the current flow and starts a new one. - # ALSO, every 5 sec, it compares the ingest clock time of the last matching event to NOW. - # If more than inactivity_timeout seconds have passed, it declares the flow finished.) - ## default 360 sec = 6 min - inactivity_timeout => "${inactivity_timeout:360}" - - # Active timeout - # = maximum possible flow duration - # (Aggregator compares timeout_timestamp_field of the current event to that of the FIRST event in the map. If the - # diff is greater than timeout, it ends the current flow and starts a new one, even if matching events are still coming in. - # ALSO, every 5 sec, it compares the ingest clock time of the first event in the map to NOW. - # If more than timeout seconds have passed, it declares the flow finished, even if matching events are still coming in.) - ## default 3600 sec = 1 hour - timeout => "${max_flow_timeout:3600}" - - # Save the aggregation map as a new event upon timeout - push_map_as_event_on_timeout => true - - # Save all the in-progress aggregation maps when logstash shuts down, to be read back in when it restarts. - ## (use a different file for each logstash pipeline!) - aggregate_maps_path => '${aggregation_maps_path:/tmp/logstash-aggregation-maps}' - - # Ruby code to run for each event. - # (The event will be added to the correct map (hash) according to its task_id. - # ||= assigns the value only if the variable does not yet exist. Only map values are included in the final event.) - code => " - # keep track of how many events we aggregate - map['stitched_flows'] ||= 0 - map['stitched_flows'] += 1 - - # map[start and end] are start and end times of the full stitched flow (timestamps) - map['start'] ||= event.get('start') - map['end'] ||= event.get('end') - - # Save these fields from the FIRST event. - # Only 'values' will be updated as we stitch events or at the very end. - map['meta'] ||= event.get('meta') - map['values'] ||= event.get('values') - map['tags'] ||= event.get('tags') - map['@sampling_corrected'] ||= event.get('@sampling_corrected') - - # Essentially the time the flow entered the pipeline - map['@ingest_time'] ||= Time.now # Saving @timestamp caused problems when aggregate map was saved to a file then read. - # but this works. - # An @timestamp will be added when the map is finally pushed as an event. - - #### FOR TESTING (EDIT IN BOTH SFLOW AND NETFLOW SECTIONS !!!) - #map['trial'] = 1 - #map['values']['indivDurations'] ||= ' ' - #map['values']['indivDurations'] += event.get('[values][duration]').to_f.round(3).to_s - #map['values']['indivDurations'] += '; ' - #### - - # If we are seeing a subsequent flow event... (assumes all events are in order!) - if map['stitched_flows'] > 1 - map['end'] = event.get('end') - # sum the packet and bit counters - map['values']['num_packets'] += event.get('[values][num_packets]') - map['values']['num_bits'] += event.get('[values][num_bits]') - end - - # Discard the original events. We only care about the aggregation. - event.cancel() - " - - # Code to run on the new aggregated event before it's pushed out - timeout_code => " + aggregate { + id => "40-4" + # Events that have matching task_id's will be aggregated. + task_id => '%{[flow_fingerprint]}' + + # Save the task_id value to this field in the aggregated event on timeout + timeout_task_id_field => "[flow_fingerprint]" + + # Use this field when determining if timeouts have occurred, in case we are processing historical data. + # It'll actually look at values of this field AND the clock times at which events come in. (Must be type 'date') + timeout_timestamp_field => "[start_date]" + + # Inactive timeout + # A flow is assumed to have ended if more than inactivity_timeout seconds have passed since the last matching event. + # (Aggregator compares timeout_timestamp_field of the current matching event and of the last matching event. If the diff is + # greater than inactivity_timeout, it ends the current flow and starts a new one. + # ALSO, every 5 sec, it compares the ingest clock time of the last matching event to NOW. + # If more than inactivity_timeout seconds have passed, it declares the flow finished.) + ## default 360 sec = 6 min + inactivity_timeout => "${inactivity_timeout:360}" + + # Active timeout + # = maximum possible flow duration + # (Aggregator compares timeout_timestamp_field of the current event to that of the FIRST event in the map. If the + # diff is greater than timeout, it ends the current flow and starts a new one, even if matching events are still coming in. + # ALSO, every 5 sec, it compares the ingest clock time of the first event in the map to NOW. + # If more than timeout seconds have passed, it declares the flow finished, even if matching events are still coming in.) + ## default 3600 sec = 1 hour + timeout => "${max_flow_timeout:3600}" + + # Save the aggregation map as a new event upon timeout + push_map_as_event_on_timeout => true + + # Save all the in-progress aggregation maps when logstash shuts down, to be read back in when it restarts. + ## (use a different file for each logstash pipeline!) + aggregate_maps_path => '${aggregation_maps_path:/tmp/logstash-aggregation-maps}' + + # Ruby code to run for each event. + # (The event will be added to the correct map (hash) according to its task_id. + # ||= assigns the value only if the variable does not yet exist. Only map values are included in the final event.) + code => " + # keep track of how many events we aggregate + map['stitched_flows'] ||= 0 + map['stitched_flows'] += 1 + + # map[start and end] are start and end times of the full stitched flow (timestamps) + map['start'] ||= event.get('start') + map['end'] ||= event.get('end') + + # Save these fields from the FIRST event. + # Only 'values' will be updated as we stitch events or at the very end !!!!! + map['meta'] ||= event.get('meta') + map['values'] ||= event.get('values') + map['tags'] ||= event.get('tags') + map['@sampling_corrected'] ||= event.get('@sampling_corrected') + + # Essentially the time the flow entered the pipeline + map['@ingest_time'] ||= Time.now # Saving @timestamp caused problems when aggregate map was saved to a file then read. + # but this works. + # An @timestamp will be added when the map is finally pushed as an event. + + #### FOR TESTING (EDIT IN BOTH SFLOW AND NETFLOW SECTIONS !!!) + #map['trial'] = 1 + #map['values']['indivDurations'] ||= ' ' + #map['values']['indivDurations'] += event.get('[values][duration]').to_f.round(3).to_s + #map['values']['indivDurations'] += '; ' + #### + + # If we are seeing a subsequent flow event... (assumes all events are in order!) + if map['stitched_flows'] > 1 + map['end'] = event.get('end') + # sum the packet and bit counters + map['values']['num_packets'] += event.get('[values][num_packets]') + map['values']['num_bits'] += event.get('[values][num_bits]') + end + + # Discard the original events. We only care about the aggregation. + event.cancel() + " + + # Code to run on the new aggregated event before it's pushed out + timeout_code => " # recalculate total duration duration = event.get('end') - event.get('start') event.set( '[values][duration]', duration.round(3) ) @@ -139,87 +139,98 @@ filter { event.set( '[values][packets_per_second]', 0.0 ) event.set( '[values][bits_per_second]', 0.0 ) end - " - } + " + } } + # === NETFLOW === # Aggregate on hash of 5-tuple + sensor + start time - # We have to do special things due to the fact that netflow sensors send "updates" about active flows, - # all with the same start time, but bytes and packets are not cumulative. - # The following will aggregate the updates up to 1 hr; and it will adjust start times when long flows are split up into 1 hr chunks. - # Note that when there's a timeout at the router (default inactive timeout is usually 15 sec), the flows will stay separate - # and not be stitched, even though they have the same 5-tuple, since the start time will change. + # + # Before aggregating, we have to do special start-time adjustments due to the fact that netflow sensors send "updates" + # about active flows, all with the same start time, but bytes and packet counts are only for the time since the last update. + # We will aggregate the updates up to max_flow_timeout (1 hr by default) then start a new aggregated flow. + # If a flow (update) comes in with a duration over max_flow_timeout, the start time will be adjusted. Multiples of + # max_flow_timeout (eg whole hours) will be cut off, since bits from those times should have already been accounted for in + # a previous aggregated flow. + # Note that if there's a timeout at the router (default inactive timeout is usually only 15 sec), the incoming flows will stay + # separate and not be stitched here, even though they have the same 5-tuple, since the start time will change. else if [meta][flow_type] == "netflow" { - ruby { - # if duration is > timeout (1 hr), adjust start time to cut off n*timeout (whole hours). - # That part of the flow should have already been processed and pushed out. - id => "40-5" - tag_on_exception => '_rubyexception in 40-aggregation.conf' - code => " - start = event.get( 'start' ) - duration = event.get( '[values][duration]' ).to_f - cuts = 0 # how many times the start time got cut - while duration > 3600.0 - start = start + 3600.0 # move start forward - duration -= 3600.0 - cuts += 1 - end - if cuts > 0 - event.set( 'start', start ) - event.set( '[values][duration]', duration ) - event.set( '@dur_cuts', cuts ) #### FOR TESTING - end - " - } - aggregate { - id => "40-6" - # unique ID used to aggregate events ## A second agg filter must have different task_id "pattern" - # For Netflow, include start time so only "updates" with the same start time are aggregated, not - # continuations after short gaps that the router considers timeouts. - task_id => '%{[flow_fingerprint]}-%{[start]}' - - # see comments above. MAKE SURE THE VALUES/DEFAULTS ARE THE SAME HERE. - timeout_timestamp_field => '[start_date]' - inactivity_timeout => "${inactivity_timeout:360}" - timeout => "${max_flow_timeout:3600}" - push_map_as_event_on_timeout => true - - ## can only set this in 1 agg. filter and it is set above! - ## aggregate_maps_path => '${aggregation_maps_path:/tmp/logstash-aggregation-maps}' - - # Ruby code to run for each event. - code => " - # we have to save flow_fingerprint explicitly for netflow - map['flow_fingerprint'] ||= event.get('flow_fingerprint') - - map['stitched_flows'] ||= 0 - map['stitched_flows'] += 1 - map['start'] ||= event.get('start') - map['end'] ||= event.get('end') - map['meta'] ||= event.get('meta') - map['values'] ||= event.get('values') - map['tags'] ||= event.get('tags') - map['@sampling_corrected'] ||= event.get('@sampling_corrected') - map['@ingest_time'] ||= Time.now - - #### FOR TESTING (EDIT IN BOTH SFLOW AND NETFLOW SECTIONS !!!) - #map['trial'] = 1 - #map['values']['indivDurations'] ||= ' ' - #map['values']['indivDurations'] += event.get('[values][duration]').to_f.round(3).to_s - #map['values']['indivDurations'] += '; ' - #### - - if map['stitched_flows'] > 1 - map['end'] = event.get('end') - map['values']['num_packets'] += event.get('[values][num_packets]') - map['values']['num_bits'] += event.get('[values][num_bits]') - map['@dur_cuts'] = event.get('@dur_cuts') #### FOR TESTING - end - - event.cancel() - " - - timeout_code => " + mutate { + add_field => { "[@metadata][max_dur]" => "${max_flow_timeout:3600}" } + id => "40-5" + } + ruby { + # if duration is > timeout (1 hr), adjust start time to cut off n*timeout (whole hours). + # That part of the flow should have already been processed and pushed out. + id => "40-6" + tag_on_exception => "_rubyexception in 40-aggregation.conf" + code => " + max_dur = event.get( '[@metadata][max_dur]' ).to_f + duration = event.get( '[values][duration]' ).to_f + start = event.get( 'start' ) + cuts = 0 # how many times the start time got cut + while duration > max_dur + start = start + max_dur # move start forward + duration -= max_dur + cuts += 1 + end + if cuts > 0 + event.set( 'start', start ) + event.set( '[values][duration]', duration ) + event.set( '@dur_cuts', cuts ) #### no. of max_dur's cut off - FOR TESTING + end + " + } + aggregate { + id => "40-7" + # unique ID used to aggregate events ## A second agg filter must have different task_id "pattern" + # For Netflow, include start time so only "updates" with the same start time are aggregated, not + # continuations after short gaps that the router considers timeouts. + task_id => '%{[flow_fingerprint]}-%{[start]}' + + # see comments above. MAKE SURE THE VALUES/DEFAULTS ARE THE SAME HERE. + timeout_timestamp_field => "[start_date]" + inactivity_timeout => "${inactivity_timeout:360}" + timeout => "${max_flow_timeout:3600}" + push_map_as_event_on_timeout => true + + ## can only set this in 1 agg. filter and it is set above! + ## aggregate_maps_path => '${aggregation_maps_path:/tmp/logstash-aggregation-maps}' + + # Ruby code to run for each event. + code => " + # we have to save flow_fingerprint explicitly for netflow + map['flow_fingerprint'] ||= event.get('flow_fingerprint') + + map['stitched_flows'] ||= 0 + map['stitched_flows'] += 1 + map['start'] ||= event.get('start') + map['end'] ||= event.get('end') + map['meta'] ||= event.get('meta') + map['values'] ||= event.get('values') + map['tags'] ||= event.get('tags') # saves first aggregated event only! + map['@sampling_corrected'] ||= event.get('@sampling_corrected') # saves first aggregated event only! + map['@ingest_time'] ||= Time.now + + #### FOR TESTING (EDIT IN BOTH SFLOW AND NETFLOW SECTIONS !!!) + #map['trial'] = 1 + # For netflow updates, indiv durations will be the cumulative duration of the aggregated flow as it aggregates + #map['values']['indivDurations'] ||= ' ' + #map['values']['indivDurations'] += event.get('[values][duration]').to_f.round(3).to_s + #map['values']['indivDurations'] += '; ' + #### + + if map['stitched_flows'] > 1 + map['end'] = event.get('end') + map['values']['num_packets'] += event.get('[values][num_packets]') + map['values']['num_bits'] += event.get('[values][num_bits]') + map['@dur_cuts'] = event.get('@dur_cuts') #### FOR TESTING + end + + event.cancel() + " + + timeout_code => " duration = event.get('end') - event.get('start') event.set( '[values][duration]', duration.round(3) ) @@ -231,7 +242,7 @@ filter { event.set( '[values][packets_per_second]', 0.0 ) event.set( '[values][bits_per_second]', 0.0 ) end - " + " } } # end if netflow From 56aa130ff9c6e087d4da9c46dfc416098ea08b99 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 9 Jun 2022 21:47:23 +0000 Subject: [PATCH 091/126] update to CHANGES file --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 1f44b24f..4da3f899 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -21,6 +21,7 @@ Features: * Added env file option to skip de-identification. * The default inactive timeout for logstash aggregation has been set to 6 minutes (to go with 5 minute sflow aggregation by sfacctd) * 0.0.0.x and 0.0.0.0 flows are tagged, and dropped by default. Unadvertised option to keep them is available in the env file. + * When cutting start times for netflow updates in 40-aggregation.conf, use the inactive timeout variable from the env file instead of harding 1 hour. * Documentation updates * Dependabot automatic remediations of vulnerabilites (for docusaurus) From 9b2bf58da1f0b3b3c1471d89822181dbc70dd424 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Tue, 14 Jun 2022 17:37:45 +0000 Subject: [PATCH 092/126] allowed ALL to specify all sensors when doing sampling rate corrections --- CHANGES.md | 7 +++++-- conf-logstash/15-sensor-specific-changes.conf | 3 ++- env.example | 18 +++++++++--------- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 4da3f899..19b75f9c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -14,8 +14,11 @@ Features: * Revised 40-aggregation.conf to deal with pmacct; separate sections for sflow and netflow. * For netflow, in 40-aggregation.conf, adjust start time of incoming flows if duration is over the active timeout. ("updates" to long lasting flows) * Added 41-thresholds.conf - applies size threshold of 10 MB (otherwise drop) and duration threshold of 0.1 sec (otherwise set rates to 0) - * Sampling rate corrections will be done in logstash only if requested in the env file AND a correction has not yet been applied by pmacct. * Sensor list for sampling rate corrections in the env file is now semicolon-delimited. - * New field: @sampling_corrected = yes/no. If sampling rate correction has been applied by pmacct or logstash, value will be yes. + * Sampling rate corrections will be done in logstash when requested (flag is set) in the env file but + ONLY IF a correction has not yet been applied (by pmacct). + * Sensor list for sampling rate corrections in the env file is now semicolon-delimited. + * Allowed "ALL" when specifying sensors for sampling rate corrections. + * Added new field @sampling_corrected = yes/no. If sampling rate correction has been applied by pmacct or logstash, value will be yes. * If a sampling rate correction is applied by logstash, add a tag with the rate. * Added CERN and Utah regexes to sensor type and group files. * Added env file option to skip de-identification. diff --git a/conf-logstash/15-sensor-specific-changes.conf b/conf-logstash/15-sensor-specific-changes.conf index 1504f3e0..8134069e 100644 --- a/conf-logstash/15-sensor-specific-changes.conf +++ b/conf-logstash/15-sensor-specific-changes.conf @@ -90,6 +90,7 @@ filter { # SAMPLING RATE CORRECTIONS #---- Manually apply a sampling correction to listed sensors # but ONLY IF there was no pre-logstash sampling correction applied by pmacct + # ALL can be used to apply the same correction to all sensors. mutate { add_field => { "[@metadata][sampling_correction_flag]" => "${sampling_correction_flag:False}" } id => "15-7" @@ -113,7 +114,7 @@ filter { # strip any leading or trailing spaces from sensor names sensors = event.get("[@metadata][sampling_correction_sensors]").map! { |e| e.strip } # if event sensor is in the list, apply corrections - if sensors.include? event.get("[meta][sensor_id]") + if (sensors.include? "ALL") or (sensors.include? event.get("[meta][sensor_id]")) correction_factor = event.get("[@metadata][sampling_correction_factor]") event.set("[values][num_bits]", correction_factor.to_i * event.get("[values][num_bits]").to_i) event.set("[values][num_packets]", correction_factor.to_i * event.get("[values][num_packets]").to_i) diff --git a/env.example b/env.example index f31f0673..befe1ec5 100644 --- a/env.example +++ b/env.example @@ -26,29 +26,29 @@ aggregation_maps_path=/data/logstash-aggregation-maps # "ALL" can refer to all sensors or all interfaces of a sensor. # If a sensor is not referenced, all its flows will be kept. ifindex_filter_flag=False -#ifindex_filter_keep= Sensor 1: 456,789; Sensor 2: ALL +##ifindex_filter_keep= Sensor 1: 456,789; Sensor 2: ALL # To change the sensor name for flows from a specified sensor and interface: # Provide the ifindex, old and new sensor names. ifindex_sensor_rename_flag=False -#ifindex_sensor_rename_ifindex=123 -#ifindex_sensor_rename_old_name=old name -#ifindex_sensor_rename_new_name=new name +##ifindex_sensor_rename_ifindex=123 +##ifindex_sensor_rename_old_name=old name +##ifindex_sensor_rename_new_name=new name # To correct for sampling in the logstash pipeline: # Normally, sampling corrections are applied before ingest into logstash, but in certain cases, -# it may need to be done in logstash. -# List affected sensors and the correction factor. +# it may need to be done in logstash. Logstash will do corrections only if pmacct reports that it has not! +# List affected sensors and the correction factor. "ALL" can refer to all sensors. sampling_correction_flag=False -#sampling_correction_sensors=sensor 1;sensor 2 -#sampling_correction_factor=100 +##sampling_correction_sensors=sensor 1;sensor 2 +##sampling_correction_factor=100 # To do subnet filtering of flows: # Flows from specified sensors will be dropped unless src or dst is in the list of subnets to keep. # "ALL" can refer to all sensors. # If a sensor is not referenced, all its flows will be kept. subnet_filter_flag=False -#subnet_filter_keep=Sensor 1: 123.45.6.0/16; Sensor 2: 123.33.33.0/24, 456.66.66.0/24 +##subnet_filter_keep=Sensor 1: 123.45.6.0/16; Sensor 2: 123.33.33.0/24, 456.66.66.0/24 # To NOT deidentify flows: # Deidentification of IP addresses is done by default. From edbe9626ed1b5c66cdb3b21c68f28b7a97b653f7 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 13 Jul 2022 14:41:51 +0000 Subject: [PATCH 093/126] Revised docker-compose and env files, pmacct configs, setup-pmacct.sh script --- .gitignore | 11 +- CHANGES.md | 24 ++-- ...cct-pretag.map => nfacctd-pretag.map.ORIG} | 3 +- .../{nfacctd.conf => nfacctd.conf.ORIG} | 12 +- ...cct-pretag.map => sfacctd-pretag.map.ORIG} | 3 +- .../{sfacctd.conf => sfacctd.conf.ORIG} | 12 +- ...evelop.yml => docker-compose.ES-Kibana.yml | 0 docker-compose.build.yml | 12 -- docker-compose.override_example.yml | 56 ++++----- docker-compose.yml | 113 +++++++++++++----- env.example | 77 +++++++----- setup-pmacct.sh | 111 +++++++++++++++++ 12 files changed, 303 insertions(+), 131 deletions(-) rename conf-pmacct/{sfacct-pretag.map => nfacctd-pretag.map.ORIG} (76%) rename conf-pmacct/{nfacctd.conf => nfacctd.conf.ORIG} (92%) rename conf-pmacct/{nfacct-pretag.map => sfacctd-pretag.map.ORIG} (77%) rename conf-pmacct/{sfacctd.conf => sfacctd.conf.ORIG} (92%) rename docker-compose.develop.yml => docker-compose.ES-Kibana.yml (100%) delete mode 100644 docker-compose.build.yml create mode 100644 setup-pmacct.sh diff --git a/.gitignore b/.gitignore index f9b3cd42..2f452f70 100644 --- a/.gitignore +++ b/.gitignore @@ -3,10 +3,12 @@ .*.swp conf/systemd/deploy ~ + .env -.DS_Store -*.pyc +docker-compose.override.yml +userConfig data + # Dependencies /website/node_modules @@ -29,17 +31,16 @@ yarn-debug.log* yarn-error.log* build +*.pyc .vscode .history .idea replayData -userConfig -docker-compose.override.yml node_modules Makefile +grnoc-netsage-pipeline-2.0.0.tar.gz blib blib/* -grnoc-netsage-pipeline-2.0.0.tar.gz pm_to_blib diff --git a/CHANGES.md b/CHANGES.md index 19b75f9c..b03032b8 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,30 +1,34 @@ ------------------------------------------------------ ## GRNOC NetSage Pipeline 2.0.0 -- +NEW PACKAGE NAME; PMACCT INSTEAD OF NFDUMP AND IMPORTER ------------------------------------------------------ -NEW PACKAGE NAME, PMACCT INSTEAD OF NFDUMP AND IMPORTER - Features: * Renamed package to grnoc-netsage-pipeline * Got rid of importer references, requirements, files, etc. * Used the %post section in the spec file to check to see if pmacct is installed. * Added systemd unit files for sfacctd and nfacctd (default will be 1 sflow, 1 netflow source, for docker installs) - * Added default sfacct and nfacct config files in conf-pmacct/ (/etc/pmacct/). - * Added default pre_tag_map files for the default ports and sensor names. - * Added 05-translate-pmacct.conf logstash config. - * Revised 40-aggregation.conf to deal with pmacct; separate sections for sflow and netflow. + * Revised docker-compose.yml file, etc. to work with pmacct containers. + * Revised parts of the .env file, including adding variables for number of sflow and netflow sensors. + * Added default sfacct and nfacct config files in conf-pmacct/ (ORIG files to be copied) + * The default inactive timeout for logstash aggregation has been set to 6 minutes (to go with 5 minute sflow aggregation by sfacctd) + * Added a setup script (setup-pmacct.sh) which the user runs to create pmacct config files and create or modify docker-compose.override.yml, + filling in environment variables set in the .env file. (pmacct configs cannot use env vars directly.) + * The number of sflow or netflow sensors can be 0. In this case, the setup script makes the container run an echo command after which it shuts down. + * Added 05-translate-pmacct.conf logstash config to translate pmacct fields to ones the pipeline uses. + * Revised 40-aggregation.conf to deal with pmacct; there are separate sections for sflow and netflow. * For netflow, in 40-aggregation.conf, adjust start time of incoming flows if duration is over the active timeout. ("updates" to long lasting flows) + * When cutting start times for netflow updates in 40-aggregation.conf, use the inactive timeout variable from the env file instead of hard coding 1 hour. + * The default inactive timeout for logstash aggregation has been set to 6 minutes (to go with 5 minute sflow aggregation by sfacctd) * Added 41-thresholds.conf - applies size threshold of 10 MB (otherwise drop) and duration threshold of 0.1 sec (otherwise set rates to 0) + * Added new field @sampling_corrected = yes/no. If sampling rate correction has been applied by pmacct or logstash, value will be yes. * Sampling rate corrections will be done in logstash when requested (flag is set) in the env file but ONLY IF a correction has not yet been applied (by pmacct). * Sensor list for sampling rate corrections in the env file is now semicolon-delimited. * Allowed "ALL" when specifying sensors for sampling rate corrections. - * Added new field @sampling_corrected = yes/no. If sampling rate correction has been applied by pmacct or logstash, value will be yes. - * If a sampling rate correction is applied by logstash, add a tag with the rate. + * When a sampling rate correction is applied by logstash, add a tag with the rate. * Added CERN and Utah regexes to sensor type and group files. * Added env file option to skip de-identification. - * The default inactive timeout for logstash aggregation has been set to 6 minutes (to go with 5 minute sflow aggregation by sfacctd) * 0.0.0.x and 0.0.0.0 flows are tagged, and dropped by default. Unadvertised option to keep them is available in the env file. - * When cutting start times for netflow updates in 40-aggregation.conf, use the inactive timeout variable from the env file instead of harding 1 hour. * Documentation updates * Dependabot automatic remediations of vulnerabilites (for docusaurus) diff --git a/conf-pmacct/sfacct-pretag.map b/conf-pmacct/nfacctd-pretag.map.ORIG similarity index 76% rename from conf-pmacct/sfacct-pretag.map rename to conf-pmacct/nfacctd-pretag.map.ORIG index 2944d245..b6d49300 100644 --- a/conf-pmacct/sfacct-pretag.map +++ b/conf-pmacct/nfacctd-pretag.map.ORIG @@ -1,5 +1,6 @@ ! This file is referenced in a config file and used to set the "label" field to a sensor name. ! Label should be "sfacct--" or "nfacct--" (for sflow or netflow, respectively) ! followed by the sensor name with spaces replaced by #'s +! eg, set_label=nfacct--Netflow#Sensor -set_label=sfacct--Sflow#Sensor +set_label=${netflowSensorName_1} diff --git a/conf-pmacct/nfacctd.conf b/conf-pmacct/nfacctd.conf.ORIG similarity index 92% rename from conf-pmacct/nfacctd.conf rename to conf-pmacct/nfacctd.conf.ORIG index a3c06425..ea4bc88b 100644 --- a/conf-pmacct/nfacctd.conf +++ b/conf-pmacct/nfacctd.conf.ORIG @@ -1,14 +1,14 @@ ! PMACCT CONFIG FOR NETFLOW -! Settings most likely to need changes: NFACCTD_PORT, PRE_TAG_MAP, and AMQP_ROUTING_KEY +! Settings most likely to need changes: NFACCTD_PORT, PRE_TAG_MAP, and possibly AMQP_ROUTING_KEY !# debug: true ! Port nfacctd should listen to -nfacctd_port: 9999 +nfacctd_port: ${netflowContainerPort_1} ! Get a value for 'label' from the pre_tag_map file. ! We use this to encode the sensor name for each port. -pre_tag_map: /etc/pmacct/nfacct-pretag.map +pre_tag_map: /etc/pmacct/nfacctd-pretag_1.map ! FOR PRINTING TO FILES instead of writing to rabbit queue (comment out amqp_* lines) ! Default is tab-separated output. If 'label' or any variable length field is in the aggregation list, you have to use csv format. @@ -20,9 +20,9 @@ pre_tag_map: /etc/pmacct/nfacct-pretag.map ! FOR SENDING FLOWS TO THE LOCAL RABBIT QUEUE plugins: amqp - amqp_host: localhost - amqp_user: guest - amqp_passwd: guest + amqp_host: ${rabbitmq_input_host} + amqp_user: ${rabbitmq_input_username} + amqp_passwd: ${rabbitmq_input_pw} amqp_exchange_type: direct amqp_exchange: amq.direct amqp_persistent_msg: true diff --git a/conf-pmacct/nfacct-pretag.map b/conf-pmacct/sfacctd-pretag.map.ORIG similarity index 77% rename from conf-pmacct/nfacct-pretag.map rename to conf-pmacct/sfacctd-pretag.map.ORIG index acc896a3..048dd54e 100644 --- a/conf-pmacct/nfacct-pretag.map +++ b/conf-pmacct/sfacctd-pretag.map.ORIG @@ -1,5 +1,6 @@ ! This file is referenced in a config file and used to set the "label" field to a sensor name. ! Label should be "sfacct--" or "nfacct--" (for sflow or netflow, respectively) ! followed by the sensor name with spaces replaced by #'s +! eg, set_label=sfacct--Sflow#Sensor -set_label=nfacct--Netflow#Sensor +set_label=${sflowSensorName_1} diff --git a/conf-pmacct/sfacctd.conf b/conf-pmacct/sfacctd.conf.ORIG similarity index 92% rename from conf-pmacct/sfacctd.conf rename to conf-pmacct/sfacctd.conf.ORIG index 4e1770f1..6e6c3d14 100644 --- a/conf-pmacct/sfacctd.conf +++ b/conf-pmacct/sfacctd.conf.ORIG @@ -1,14 +1,14 @@ ! PMACCT CONFIG FOR SFLOW -! Settings most likely to need changes: SFACCTD_PORT, PRE_TAG_MAP, and AMQP_ROUTING_KEY +! Settings most likely to need changes: SFACCTD_PORT, PRE_TAG_MAP, and possibly AMQP_ROUTING_KEY !# debug: true ! Port sfacctd should listen to -sfacctd_port: 9998 +sfacctd_port: ${sflowContainerPort_1} ! Get a value for 'label' from the pre_tag_map file. ! We use this to encode the sensor name for each port. - pre_tag_map: /etc/pmacct/sfacct-pretag.map +pre_tag_map: /etc/pmacct/sfacctd-pretag_1.map ! FOR PRINTING TO FILES instead of writing to rabbit queue (comment out amqp_* lines) ! Default format is tab-separated. If 'label' or any variable length field is in the aggregation list, you have to use csv format. @@ -20,9 +20,9 @@ sfacctd_port: 9998 ! FOR SENDING FLOWS TO THE LOCAL RABBIT QUEUE plugins: amqp - amqp_host: localhost - amqp_user: guest - amqp_passwd: guest + amqp_host: ${rabbitmq_input_host} + amqp_user: ${rabbitmq_input_username} + amqp_passwd: ${rabbitmq_input_pw} amqp_exchange_type: direct amqp_exchange: amq.direct amqp_persistent_msg: true diff --git a/docker-compose.develop.yml b/docker-compose.ES-Kibana.yml similarity index 100% rename from docker-compose.develop.yml rename to docker-compose.ES-Kibana.yml diff --git a/docker-compose.build.yml b/docker-compose.build.yml deleted file mode 100644 index d1d60cf3..00000000 --- a/docker-compose.build.yml +++ /dev/null @@ -1,12 +0,0 @@ -version: "3.7" -services: - importer: - image: netsage/pipeline_importer:latest - build: - context: . - dockerfile: compose/importer/Dockerfile - logstash: - image: netsage/pipeline_logstash:latest - build: - context: . - dockerfile: ./compose/logstash/Dockerfile diff --git a/docker-compose.override_example.yml b/docker-compose.override_example.yml index 58d5799f..554703c8 100644 --- a/docker-compose.override_example.yml +++ b/docker-compose.override_example.yml @@ -1,34 +1,34 @@ version: "3.7" -services: - logstash: - image: netsage/pipeline_logstash:latest - ## If you need to override JVM options, uncomment these lines - # volumes: - # - ./userConfig/jvm.options:/usr/share/logstash/config/jvm.options +# Settings in this file override or add to those in docker-compose.yml. Copy in anything that needs to be changed. +# It will not be overwritten on upgrade. + +# setup_pmacct.sh creates docker_compose.override.yml from docker-compose.override_example.yml, if it does not yet exist, and fills in env vars. +# (It also creates or re-creates pmacct config files, filling in env vars.) - importer: - image: netsage/pipeline_importer:latest - ## If you add additional collectors or need to make other changes in the importer "shared" config, - ## use the netsage_override.xml file and uncomment the following lines - # volumes: - # - ./userConfig/netsage_override.xml:/etc/grnoc/netsage/deidentifier/netsage_shared.xml +# For ports: In the final docker_compose.override.yml, the 'port on host' should match what is listed in the .env file, while +# the 'port in the container' should match what is in the *facctd_n conf file (the latter is port number is determined by the setup script). - ## Modify port numbers as needed, and add any additional collectors here (see Docker Advanced documentation). - ## Remove any collectors that do not need to be running. - sflow-collector: - image: netsage/nfdump-collector:alpine-1.6.23 - restart: always - command: sfcapd -T all -l /data -S 1 -w -z -p 9998 - volumes: - - ./data/input_data/sflow:/data +services: + + sfacctd_1: ports: - - "9998:9998/udp" - netflow-collector: - image: netsage/nfdump-collector:alpine-1.6.23 - command: nfcapd -T all -l /data -S 1 -w -z -p 9999 + # port on host receiving flow data : port in the container + - "${sflowPort_1}:${sflowContainerPort_1}/udp" + + nfacctd_1: ports: - - "9999:9999/udp" - restart: always - volumes: - - ./data/input_data/netflow:/data + # port on host receiving flow data : port in the container + - "${netflowPort_1}:${netflowContainerPort_1}/udp" + +# TO ADD A SECOND SENSOR OF THE SAME TYPE: +# add a section to the .env file that uses *_2 variable names +# and increase the number of sflowSensors or netflowSensors; +# copy the whole nfacctd_1 or sfacctd_1 service section from docker-compose.yml to docker-compose.override.yml; +# change the ports to reference env variables instead of specifying example values: +# - "@{sflowPort_1}:@{sflowContainerPort_1}/udp" +# or - "@{netflowPort_1}:@{netflowContainerPort_1}/udp" +# change the @'s to $'s in the above lines! (can't show $'s here or env var values will get stuck into the comment); +# change all _1's to _2's (or _3's for third sensor of one type, etc) in the new section; +# then run the setup_pmacct.sh script [again]. +# Double check the changes made to the override file! diff --git a/docker-compose.yml b/docker-compose.yml index 9430fd0c..25b980ff 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,44 +1,93 @@ version: "3.7" + +# Default docker services and settings. +# Do not make changes here; use the override file. + +# Shared network for the containers. They will be able to communicate over default ports. +networks: + netsage-network: + services: - rabbit: - image: rabbitmq:3.8-management + sfacctd_1: + container_name: sfacctd_1 + image: sfacctd:7Jun2022 env_file: .env - hostname: rabbit - volumes: - - ./data/rabbit:/var/lib/rabbitmq ports: - - "15672:15672" - - "5671:5671" - - "5672:5672" - importer: - image: netsage/pipeline_importer:latest + # port on host for incoming flow data : port in the container + - "8000:8000/udp" + volumes: + # location of our configs : default location : read-only + - ./conf-pmacct:/etc/pmacct:ro + command: + # override the default parameters (entrypoint is the actual command) + - -f + - /etc/pmacct/sfacctd_1.conf + networks: + - netsage-network + depends_on: + - rabbit + + nfacctd_1: + container_name: nfacctd_1 + image: nfacctd:7Jun2022 env_file: .env + ports: + # port on host for incoming flow data : port in the container + - "9000:9000/udp" + volumes: + # location of our configs : default location : read-only + - ./conf-pmacct:/etc/pmacct:ro + command: + # override the default parameters (entrypoint is the actual command) + - -f + - /etc/pmacct/nfacctd_1.conf + networks: + - netsage-network depends_on: - rabbit - restart: always + + rabbit: + container_name: rabbit + hostname: rabbit + image: rabbitmq:3.9-management + env_file: .env + ports: + # The port for the UI needs to be mapped to that on the host + # To view, go to https:///rabbit + - "15672:15672" volumes: - - ./data:/data - - ./data/importer_cache:/var/cache/netsage - - ./conf-logstash:/usr/share/logstash/pipeline/ - labels: - ofelia.enabled: "true" - ofelia.job-exec.dataUpdate.schedule: "@weekly" - ofelia.job-exec.dataUpdate.command: "/tmp/docker_init.sh" + # Used a name Volume for the rabbitmq config and files for its own use. + # We want data to persist but don't need to see it. + - rabbit_vol:/var/lib/rabbitmq + networks: + - netsage-network + logstash: - image: netsage/pipeline_logstash:latest + container_name: logstash + image: docker.elastic.co/logstash/logstash:7.16.2 env_file: .env - depends_on: - - importer - ports: - - "5044:5044" + # Explicitly specify *.conf to be sure logstash doesn't use *.disabled configs. + command: logstash -f /etc/logstash/conf.d/*.conf volumes: - - ./conf-logstash:/usr/share/logstash/pipeline/ - - ./data:/data - - ./data/cache:/var/lib/grnoc/netsage/ - ofelia: ## Scheduler Task - image: mcuadros/ofelia:v0.3.0 - command: daemon --docker + # location of our configs : default location : read-only + - ./conf-logstash/:/etc/logstash/conf.d/:ro + # location on host of downloaded maxmind and other files : default location : read-only + - /var/lib/grnoc/netsage/:/var/lib/grnoc/netsage/:ro ###### + networks: + - netsage-network depends_on: - - importer - volumes: - - /var/run/docker.sock:/var/run/docker.sock:ro + - rabbit + # restart is not included since if logstash dies, there may be an error and we dont' want it to keep restarting over and over + +# ofelia: ## Scheduler Task +# image: mcuadros/ofelia:v0.3.0 +# command: daemon --docker +# depends_on: +# - importer +# volumes: +# - /var/run/docker.sock:/var/run/docker.sock:ro + +# named volumes +volumes: + rabbit_vol: + diff --git a/env.example b/env.example index befe1ec5..d235f334 100644 --- a/env.example +++ b/env.example @@ -1,17 +1,31 @@ -# Sensor names that logstash will assign to flows -# === EXAMPLE VALUES MUST BE REPLACED === -sflowSensorName=The Sflow Sensor Name -netflowSensorName=The Netflow Sensor Name +# PMACCT SETTINGS +# Number of sensors of each type +# === UPDATE IF THERE ARE NOT 1 OF EACH TYPE === +sflowSensors=1 +netflowSensors=1 -# Processed flows are normally written to a rabbit queue. -# The default is to write to the local rabbitmq server "rabbit". +# Env variables for one sensor should all end in the same suffix, +# and there should be a sequence (_1, _2, etc) for sflow and a sequence for netflow. +# For each sensor, list the following: +# The sensor name to assign to flows +# The port on the pipeline host to which the router is sending flows +# === REPLACE EXAMPLE VALUES === +sflowSensorName_1=The Sflow Sensor Name +sflowPort_1=8000 + +netflowSensorName_1=The Netflow Sensor Name +netflowPort_1=9000 + +# LOGSTASH SETTINGS +# By default, processed flows are sent to a rabbit queue. +# The example settings write to the rabbitmq container, where they will accumulate, by default. # === TO SEND PROCESSED FLOWS TO GlobalNOC, ASK FOR THE PROPER SETTINGS === rabbitmq_output_host=rabbit rabbitmq_output_username=guest rabbitmq_output_pw=guest -rabbitmq_output_key=netsage_archive_input +rabbitmq_output_key=processed_flows -# Logstash Aggregation Filter settings +# Aggregation Filter settings # Default inactivity_timeout is 6-minute. If no matching flows have come in for 6 minutes, end the aggregated flow. # Default max_flow_timeout is 1 hour. This is the maximum allowed flow duration; longer flows will be broken up. # Aggregation_maps_path is where flows undergoing aggregation are saved if logstash shuts down. The default is for Docker installs. @@ -19,28 +33,28 @@ inactivity_timeout=360 max_flow_timeout=3600 aggregation_maps_path=/data/logstash-aggregation-maps -# PROCESSING OPTIONS - see the "Docker Advanced" documentation +# Advanced Processing Options - see the "Docker Advanced" documentation # To do ifindex (interface) filtering of flows from specified sensors: # Flows from listed sensors will be dropped unless src or dst interface is in the list of ifindexes to keep. # "ALL" can refer to all sensors or all interfaces of a sensor. # If a sensor is not referenced, all its flows will be kept. ifindex_filter_flag=False -##ifindex_filter_keep= Sensor 1: 456,789; Sensor 2: ALL +##ifindex_filter_keep=Sensor A Name: 456,789; Sensor B Name: ALL # To change the sensor name for flows from a specified sensor and interface: # Provide the ifindex, old and new sensor names. ifindex_sensor_rename_flag=False ##ifindex_sensor_rename_ifindex=123 -##ifindex_sensor_rename_old_name=old name -##ifindex_sensor_rename_new_name=new name +##ifindex_sensor_rename_old_name=Old Sensor Name +##ifindex_sensor_rename_new_name=New Sensor Name # To correct for sampling in the logstash pipeline: # Normally, sampling corrections are applied before ingest into logstash, but in certain cases, # it may need to be done in logstash. Logstash will do corrections only if pmacct reports that it has not! # List affected sensors and the correction factor. "ALL" can refer to all sensors. sampling_correction_flag=False -##sampling_correction_sensors=sensor 1;sensor 2 +##sampling_correction_sensors=Sensor A Name; Sensor B Name ##sampling_correction_factor=100 # To do subnet filtering of flows: @@ -48,34 +62,37 @@ sampling_correction_flag=False # "ALL" can refer to all sensors. # If a sensor is not referenced, all its flows will be kept. subnet_filter_flag=False -##subnet_filter_keep=Sensor 1: 123.45.6.0/16; Sensor 2: 123.33.33.0/24, 456.66.66.0/24 +##subnet_filter_keep=Sensor A Name: 123.45.6.0/16; Sensor B Name: 123.33.33.0/24, 456.66.66.0/24 # To NOT deidentify flows: # Deidentification of IP addresses is done by default. # To keep full IP addresses, set this parameter to True. full_IPs_flag=False -# OTHER SETTINGS +# Logstash Process Settings -# Logstash settings -# set this to false so we don't install elasticsearch locally -XPACK_MONITORING_ENABLED=false -# java heap size for logstash -LS_JAVA_OPTS=-Xmx2g -Xms2g -# Do not change unless you are not using logstash aggregation! The aggregation filter requires one logstash worker only! +# The aggregation filter requires there be only one logstash worker! PIPELINE_WORKERS=1 -# for debugging -## LOG_LEVEL=debug +PIPELINE_ORDERED=true +# memory - java heap size +LS_JAVA_OPTS=-Xmx2g -Xms2g +# we are not going to evaluate cluster health and performance +XPACK_MONITORING_ENABLED=false -# Local RabbitMQ Server config (for the post-pmacct/pre-logstash queue) -RABBITMQ_ERLANG_COOKIE='secret cookie' -RABBIT_HOST=rabbit -RABBITMQ_DEFAULT_USER=guest -RABBITMQ_DEFAULT_PASS=guest -discovery.type=single-node +# LOGSTASH/PAMACCT SETTINGS +# pmacct will write and logstash will read flows from this rabbit host +# (when running rabbitmq locally outside of docker, input_host should be localhost) rabbitmq_input_host=rabbit rabbitmq_input_username=guest rabbitmq_input_pw=guest -# In case you run elasticsearch and kibana +# RABBITMQ SERVER SETTINGS (container) +# (for the post-pmacct/pre-logstash queue) +RABBIT_HOST=rabbit +RABBITMQ_DEFAULT_USER=guest +RABBITMQ_DEFAULT_PASS=guest +RABBITMQ_ERLANG_COOKIE='secret cookie' +discovery.type=single-node + +# In case you run elasticsearch container and want logstash to write to it ELASTIC_HOSTNAME='elastic' diff --git a/setup-pmacct.sh b/setup-pmacct.sh new file mode 100644 index 00000000..53c67385 --- /dev/null +++ b/setup-pmacct.sh @@ -0,0 +1,111 @@ +#!/bin/bash + +# This script reads pmacct env variables from the .env file, +# creates config files from the examples, and copies the env variable +# values into them. (Needed because pmacct doesn't support using env vars) +echo "" + +# Get env variables from .env file +input=".env" +# read line by line, splitting each line at "=" +while IFS='=' read -r name value +do + # save only netflow, sflow, and rabbitmq_input variables + if [[ $name == sflow* || $name == netflow* || $name == rabbitmq_input_* ]] + then + ##echo "Got $name == $value" >&2 + # if this is a sensor name, we need to encode it using #'s for spaces and prefixing with "sfacct--" or "nfacct--" + if [[ $name == sflowSensorName_* ]] + then + value="${value// /#}" + value="sfacct--${value}" + fi + if [[ $name == netflowSensorName_* ]] + then + value="${value// /#}" + value="nfacct--${value}" + fi + # export name-value pairs as env vars + export $name="$value" + fi +done < "$input" + +# Loop over sflow sensors / create config files +port=8000 +for (( n=1; n<=${sflowSensors}; n++ )) +do + # assign the port the container will use + # (Note that it is important to have the same internal (container) port numbers used for the same services (eg, _1) + # every time this script is run, since an override file with hardcoded port numbers may already exist.) + export sflowContainerPort_$n=$port + # create temp config files + cp conf-pmacct/sfacctd.conf.ORIG conf-pmacct/sfacctd_$n.conf.temp + cp conf-pmacct/sfacctd-pretag.map.ORIG conf-pmacct/sfacctd-pretag_$n.map.temp + # change *_1 env var names to *_n + sed -i "s/_1/_$n/g" conf-pmacct/sfacctd_$n.conf.temp + sed -i "s/_1/_$n/g" conf-pmacct/sfacctd-pretag_$n.map.temp + # replace all environment variables with values and save to final filenames + envsubst < conf-pmacct/sfacctd_$n.conf.temp > conf-pmacct/sfacctd_$n.conf + envsubst < conf-pmacct/sfacctd-pretag_$n.map.temp > conf-pmacct/sfacctd-pretag_$n.map + # remove temp files + rm conf-pmacct/*.temp + # next port number is 1 more + port=$(($port+1)) +done + +# Loop over netflow sensors / create config files +port=9000 +for (( n=1; n<=${netflowSensors}; n++ )) +do + # assign the port the container will use + # (Note that it is important to have the same internal (container) port numbers used for the same services (eg, _1) + # every time this script is run, since an override file with hardcoded port numbers may already exist.) + export netflowContainerPort_$n=$port + # create temp config files + cp conf-pmacct/nfacctd.conf.ORIG conf-pmacct/nfacctd_$n.conf.temp + cp conf-pmacct/nfacctd-pretag.map.ORIG conf-pmacct/nfacctd-pretag_$n.map.temp + # change *_1 env var names to *_n + sed -i "s/_1/_$n/g" conf-pmacct/nfacctd_$n.conf.temp + sed -i "s/_1/_$n/g" conf-pmacct/nfacctd-pretag_$n.map.temp + # replace all environment variables with values and save to final filenames + envsubst < conf-pmacct/nfacctd_$n.conf.temp > conf-pmacct/nfacctd_$n.conf + envsubst < conf-pmacct/nfacctd-pretag_$n.map.temp > conf-pmacct/nfacctd-pretag_$n.map + # remove temp files + rm conf-pmacct/*.temp + # next port number is 1 more + port=$(($port+1)) +done + +# If the docker-compose.override file doesn't exist, make it by copying the example +if [[ ! -f "docker-compose.override.yml" ]] +then + echo "Creating docker-compose.override.yml." + cp docker-compose.override_example.yml docker-compose.override.yml +fi + +# If there are no sflow sensors, and we didn't already do it, override the sfacctd command so the container +# just echos a line and exits right away; and set the port env vars to defaults so docker-compose doesn't complain that either is unset +if [[ ${sflowSensors} -eq 0 ]] && ! grep -ql "No Sflow collector" "docker-compose.override.yml" +then + echo "Replacing entry_point for sflow collector since it is not needed." + sed -i "s/sfacctd_1:/sfacctd_1:\n entrypoint: echo 'No Sflow collector.'/" docker-compose.override.yml + export sflowPort_1=8000 + export sflowContainerPort_1=8000 +fi +# Same if no netflow sensors +if [[ ${netflowSensors} -eq 0 ]] && ! grep -ql "No Netflow collector" "docker-compose.override.yml" +then + echo "Replacing entry_point for netflow collector since it is not needed." + sed -i "s/nfacctd_1:/nfacctd_1:\n entrypoint: echo 'No Netflow collector.'/" docker-compose.override.yml + export netflowPort_1=9000 + export netflowContainerPort_1=9000 +fi + +# Replace any env variables in the override file. +envsubst < docker-compose.override.yml > docker-compose.override.yml.temp +mv docker-compose.override.yml.temp docker-compose.override.yml + + +echo "Pmacct config files have been created, based on the .env file." +echo "Please check the docker-compose.override.yml file to be sure it matches the .env file!" +echo "" From e00ef55327d306e22266b4234888687ccf98eb18 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 13 Jul 2022 18:39:45 +0000 Subject: [PATCH 094/126] one cron file for all downloads to downloads/ and conf-logstash/support/ --- cron.d/netsage-downloads.cron.example | 35 +++++++++++++++++++++++++++ docker-compose.yml | 14 +++-------- downloads/.gitignore | 4 +++ 3 files changed, 42 insertions(+), 11 deletions(-) create mode 100644 cron.d/netsage-downloads.cron.example create mode 100644 downloads/.gitignore diff --git a/cron.d/netsage-downloads.cron.example b/cron.d/netsage-downloads.cron.example new file mode 100644 index 00000000..2ced8596 --- /dev/null +++ b/cron.d/netsage-downloads.cron.example @@ -0,0 +1,35 @@ +SHELL=/bin/sh +PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin +MAILTO=root + +# Download possibly-updated files required by the Netsage Pipeline + +## -- SET THE DOWNLOAD LOCATIONS (DOWNLOAD_PATH and SUPPORT_PATH) AS DESIRED THEN COPY TO /etc/cron.d/netsage-downloads.cron. +# For docker installations, --PATH-- should be replaced by the absolute location of the git checkout. +DOWNLOAD_PATH = "--PATH--/netsage-pipeline/downloads" +SUPPORT_PATH = "--PATH--/netsage-pipeline/conf-logstash/support" + +# MAXMIND ASN on Saturdays at 23:30 UTC +30 23 * * 6 root /usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/GeoLite2-ASN.mmdb -q -O $DOWNLOAD_PATH/newASN.mmdb && mv $DOWNLOAD_PATH/newASN.mmdb $DOWNLOAD_PATH/GeoLite2-ASN.mmdb && touch $DOWNLOAD_PATH/GeoLite2-ASN.mmdb + +# MAXMIND CITY on Saturdays at 23:35 UTC +35 23 * * 6 root /usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/GeoLite2-City.mmdb -q -O $DOWNLOAD_PATH/newCity.mmdb && mv $DOWNLOAD_PATH/newCity.mmdb $DOWNLOAD_PATH/GeoLite2-City.mmdb && touch $DOWNLOAD_PATH/GeoLite2-City.mmdb + +# CAIDA file on Saturdays at 23:40 UTC +40 23 * * 6 root /usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/CAIDA-org-lookup.csv -q -O $DOWNLOAD_PATH/newCAIDA.mmdb && mv $DOWNLOAD_PATH/newCAIDA.mmdb $DOWNLOAD_PATH/CAIDA-org-lookup.csv && touch $DOWNLOAD_PATH//CAIDA-org-lookup.csv + +# SCIENCE REGISTRY Saturdays at 23:45 UTC +45 23 * * 6 root /usr/bin/wget https://scienceregistry.netsage.global/exported/scireg.mmdb -q -O $DOWNLOAD_PATH/scireg.mmdb.new && mv $DOWNLOAD_PATH/scireg.mmdb.new $DOWNLOAD_PATH/scireg.mmdb && touch $DOWNLOAD_PATH/scireg.mmdb + +# FRGP MEMBER LIST on Saturdays at 23:50 UTC +50 23 * * 6 root /usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/FRGP-members-list.rb -q -O $SUPPORT_PATH/newFRGP.rb && mv $SUPPORT_PATH/newFRGP.rb $SUPPORT_PATH/FRGP-members-list.rb && touch $SUPPORT_PATH/FRGP-members-list.rb + +# ILIGHT MEMBER LIST on Saturdays at 23:52 UTC +52 23 * * 6 root /usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/ilight-members-list.rb -q -O $SUPPORT_PATH/newilight.rb && mv $SUPPORT_PATH/newilight.rb $SUPPORT_PATH/ilight-members-list.rb && touch $SUPPORT_PATH/ilight-members-list.rb + +# ONENET MEMBER LIST on Saturdays at 23:54 UTC +54 23 * * 6 root /usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/onenet-members-list.rb -q -O $SUPPORT_PATH/newonenet.rb && mv $SUPPORT_PATH/newonenet.rb $SUPPORT_PATH/onenet-members-list.rb && touch $SUPPORT_PATH/onenet-members-list.rb + + + + diff --git a/docker-compose.yml b/docker-compose.yml index 25b980ff..c91c4520 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -69,24 +69,16 @@ services: # Explicitly specify *.conf to be sure logstash doesn't use *.disabled configs. command: logstash -f /etc/logstash/conf.d/*.conf volumes: - # location of our configs : default location : read-only + # location of logstash configs on host : location within container : read-only - ./conf-logstash/:/etc/logstash/conf.d/:ro - # location on host of downloaded maxmind and other files : default location : read-only - - /var/lib/grnoc/netsage/:/var/lib/grnoc/netsage/:ro ###### + # location of downloaded maxmind and caida files on host : location within container : read-only + - ./downloads/:/var/lib/grnoc/netsage/:ro networks: - netsage-network depends_on: - rabbit # restart is not included since if logstash dies, there may be an error and we dont' want it to keep restarting over and over -# ofelia: ## Scheduler Task -# image: mcuadros/ofelia:v0.3.0 -# command: daemon --docker -# depends_on: -# - importer -# volumes: -# - /var/run/docker.sock:/var/run/docker.sock:ro - # named volumes volumes: rabbit_vol: diff --git a/downloads/.gitignore b/downloads/.gitignore new file mode 100644 index 00000000..5e7d2734 --- /dev/null +++ b/downloads/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore From 26f5f52f30c0e6e3cb9ae934cd76563a69e8cc1d Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 14 Jul 2022 16:02:39 +0000 Subject: [PATCH 095/126] added setup-cron.sh, modified cron files, .sh file to do wgets --- CHANGES.md | 3 ++ bin/docker-netsage-downloads.sh.ORIG | 42 +++++++++++++++++++ .../support/sensor_groups.json.example | 4 ++ .../support/sensor_types.json.example | 5 +++ ...cron.example => bm-netsage-downloads.cron} | 15 ++++--- cron.d/docker-netsage-downloads.cron.ORIG | 9 ++++ cron.d/netsage-caida-update.cron | 13 ------ cron.d/netsage-maxmind-update.cron | 14 ------- cron.d/netsage-memberlists-update.cron | 19 --------- cron.d/netsage-scireg-update.cron | 16 ------- setup-cron.sh | 19 +++++++++ setup-pmacct.sh | 0 12 files changed, 92 insertions(+), 67 deletions(-) create mode 100755 bin/docker-netsage-downloads.sh.ORIG create mode 100644 conf-logstash/support/sensor_groups.json.example create mode 100644 conf-logstash/support/sensor_types.json.example rename cron.d/{netsage-downloads.cron.example => bm-netsage-downloads.cron} (73%) create mode 100644 cron.d/docker-netsage-downloads.cron.ORIG delete mode 100644 cron.d/netsage-caida-update.cron delete mode 100644 cron.d/netsage-maxmind-update.cron delete mode 100644 cron.d/netsage-memberlists-update.cron delete mode 100644 cron.d/netsage-scireg-update.cron create mode 100755 setup-cron.sh mode change 100644 => 100755 setup-pmacct.sh diff --git a/CHANGES.md b/CHANGES.md index b03032b8..cdb5fa32 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -29,6 +29,9 @@ Features: * Added CERN and Utah regexes to sensor type and group files. * Added env file option to skip de-identification. * 0.0.0.x and 0.0.0.0 flows are tagged, and dropped by default. Unadvertised option to keep them is available in the env file. + * Changed to sensor_groups.json.example and sensor_types.json.example. From now on, our particular regexes will be downloaded from scienceregistry.grnoc. + * Cron file runs script to download all files from scienceregistry.grnoc once/wk + * setup-cron.sh script copies ORIG .cron and .sh files and plugs in username and location of git checkout. * Documentation updates * Dependabot automatic remediations of vulnerabilites (for docusaurus) diff --git a/bin/docker-netsage-downloads.sh.ORIG b/bin/docker-netsage-downloads.sh.ORIG new file mode 100755 index 00000000..e5336cb5 --- /dev/null +++ b/bin/docker-netsage-downloads.sh.ORIG @@ -0,0 +1,42 @@ +#!/bin/bash + +# Download possibly-updated files required by the Netsage Pipeline +# Use touch to change the file time to the time of download + +# -for docker installations - +# DOWNLOAD_PATH="/PATH-TO-GIT-CHECKOUT/downloads" +# SUPPORT_PATH="/PATH-TO-GIT-CHECKOUT/conf-logstash/support" + +# -for bare metal installations - run with sudo and use +# DOWNLOAD_PATH="/var/lib/grnoc/netsage" +# SUPPORT_PATH="/etc/logstash/conf.d/support" + +DOWNLOAD_PATH="-PATH-TO-GIT-CHECKOUT-/downloads" +SUPPORT_PATH="-PATH-TO-GIT-CHECKOUT-/conf-logstash/support" + +# MAXMIND ASN +/usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/GeoLite2-ASN.mmdb -q -O $DOWNLOAD_PATH/GeoLite2-ASN.mmdb && touch $DOWNLOAD_PATH/GeoLite2-ASN.mmdb + +# MAXMIND CITY +/usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/GeoLite2-City.mmdb -q -O $DOWNLOAD_PATH/GeoLite2-City.mmdb && touch $DOWNLOAD_PATH/GeoLite2-City.mmdb + +# CAIDA file +/usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/CAIDA-org-lookup.csv -q -O $DOWNLOAD_PATH/CAIDA-org-lookup.csv && touch $DOWNLOAD_PATH/CAIDA-org-lookup.csv + +# SCIENCE REGISTRY +/usr/bin/wget https://scienceregistry.netsage.global/exported/scireg.mmdb -q -O $DOWNLOAD_PATH/scireg.mmdb && touch $DOWNLOAD_PATH/scireg.mmdb + +# FRGP MEMBER LIST +/usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/FRGP-members-list.rb -q -O $SUPPORT_PATH/FRGP-members-list.rb && touch $SUPPORT_PATH/FRGP-members-list.rb + +# ILIGHT MEMBER LIST +/usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/ilight-members-list.rb -q -O $SUPPORT_PATH/ilight-members-list.rb && touch $SUPPORT_PATH/ilight-members-list.rb + +# ONENET MEMBER LIST +/usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/onenet-members-list.rb -q -O $SUPPORT_PATH/onenet-members-list.rb && touch $SUPPORT_PATH/onenet-members-list.rb + +# SENSOR_GROUPS +/usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/sensor_groups.json -q -O $SUPPORT_PATH/sensor_groups.json && touch $SUPPORT_PATH/sensor_groups.json + +# SENSOR_TYPES +/usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/sensor_types.json -q -O $SUPPORT_PATH/sensor_types.json && touch $SUPPORT_PATH/sensor_types.json diff --git a/conf-logstash/support/sensor_groups.json.example b/conf-logstash/support/sensor_groups.json.example new file mode 100644 index 00000000..d2470717 --- /dev/null +++ b/conf-logstash/support/sensor_groups.json.example @@ -0,0 +1,4 @@ +{ + "^A.*$": "sensor group A", + "^B.*$": "sensor group B" +} diff --git a/conf-logstash/support/sensor_types.json.example b/conf-logstash/support/sensor_types.json.example new file mode 100644 index 00000000..e17f8763 --- /dev/null +++ b/conf-logstash/support/sensor_types.json.example @@ -0,0 +1,5 @@ +{ + "^.*Tstat$": "Data Archive", + "^Network A -.*$": "Circuit", + "^Network B -.*$": "Regional Network" +} diff --git a/cron.d/netsage-downloads.cron.example b/cron.d/bm-netsage-downloads.cron similarity index 73% rename from cron.d/netsage-downloads.cron.example rename to cron.d/bm-netsage-downloads.cron index 2ced8596..6eb6a3f9 100644 --- a/cron.d/netsage-downloads.cron.example +++ b/cron.d/bm-netsage-downloads.cron @@ -2,12 +2,11 @@ SHELL=/bin/sh PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin MAILTO=root -# Download possibly-updated files required by the Netsage Pipeline +# Download possibly-updated files required by the Netsage Pipeline from scienceregistry.grnoc.iu.edu +# This cron file is to be used for bare-metal installations +DOWNLOAD_PATH="/var/lib/grnoc/netsage" +SUPPORT_PATH="/etc/logstash/conf.d/support" -## -- SET THE DOWNLOAD LOCATIONS (DOWNLOAD_PATH and SUPPORT_PATH) AS DESIRED THEN COPY TO /etc/cron.d/netsage-downloads.cron. -# For docker installations, --PATH-- should be replaced by the absolute location of the git checkout. -DOWNLOAD_PATH = "--PATH--/netsage-pipeline/downloads" -SUPPORT_PATH = "--PATH--/netsage-pipeline/conf-logstash/support" # MAXMIND ASN on Saturdays at 23:30 UTC 30 23 * * 6 root /usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/GeoLite2-ASN.mmdb -q -O $DOWNLOAD_PATH/newASN.mmdb && mv $DOWNLOAD_PATH/newASN.mmdb $DOWNLOAD_PATH/GeoLite2-ASN.mmdb && touch $DOWNLOAD_PATH/GeoLite2-ASN.mmdb @@ -30,6 +29,12 @@ SUPPORT_PATH = "--PATH--/netsage-pipeline/conf-logstash/support" # ONENET MEMBER LIST on Saturdays at 23:54 UTC 54 23 * * 6 root /usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/onenet-members-list.rb -q -O $SUPPORT_PATH/newonenet.rb && mv $SUPPORT_PATH/newonenet.rb $SUPPORT_PATH/onenet-members-list.rb && touch $SUPPORT_PATH/onenet-members-list.rb +# SENSOR_GROUPS on Saturdays at 23:15 UTC +15 23 * * 6 root /usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/sensor_groups.json -q -O $SUPPORT_PATH/newsensor_groups.json && mv $SUPPORT_PATH/newsensor_groups.json $SUPPORT_PATH/sensor_groups.json && touch $SUPPORT_PATH/sensor_groups.json + +# SENSOR_TYPES on Saturdays at 23:20 UTC +20 23 * * 6 root /usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/sensor_types.json -q -O $SUPPORT_PATH/newsensor_types.json && mv $SUPPORT_PATH/newsensor_types.json $SUPPORT_PATH/sensor_types.json && touch $SUPPORT_PATH/sensor_types.json + diff --git a/cron.d/docker-netsage-downloads.cron.ORIG b/cron.d/docker-netsage-downloads.cron.ORIG new file mode 100644 index 00000000..62f68539 --- /dev/null +++ b/cron.d/docker-netsage-downloads.cron.ORIG @@ -0,0 +1,9 @@ +SHELL=/bin/sh + +# This cron file is to be used with Docker installations. Fill in missing info manually or use the setup-cron.sh script. + +# Use wget to download possibly-updated files required for the Netsage Pipeline from scienceregistry.grnoc.iu.edu. +# Put them in directories in the git checkout of the pipeline code. + +# Get all on Saturdays at 23:00 UTC +00 23 * * 6 -USER- -PATH-TO-GIT-CHECKOUT-/bin/docker-netsage-downloads.sh > /dev/null 2>&1 diff --git a/cron.d/netsage-caida-update.cron b/cron.d/netsage-caida-update.cron deleted file mode 100644 index 0ddd42f9..00000000 --- a/cron.d/netsage-caida-update.cron +++ /dev/null @@ -1,13 +0,0 @@ -SHELL=/bin/sh -PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin -MAILTO=root - -# Get updated CAIDA asn-to-org csv file from scienceregistry.grnoc.iu.edu -# It will be updated only quartly but can be downloaded weekly to be able to monitor its freshness -# -q for quiet - so no email if no output. -# Instead of touch, if your wget version has it, you can use --no-use-server-timestamps which sets the file's time to the download time -# - -## UNCOMMENT AFTER FILLING IN USERNAME AND PW -# on Wednesdays at 23:40 UTC -##40 23 * * 3 root /usr/bin/wget --user xxx --password xxx https://scienceregistry.grnoc.iu.edu/exported/CAIDA-org-lookup.csv -q -O /var/lib/grnoc/netsage/newCAIDA.mmdb && mv /var/lib/grnoc/netsage/newCAIDA.mmdb /var/lib/grnoc/netsage/CAIDA-org-lookup.csv && touch /var/lib/grnoc/netsage/CAIDA-org-lookup.csv diff --git a/cron.d/netsage-maxmind-update.cron b/cron.d/netsage-maxmind-update.cron deleted file mode 100644 index f9d8aca0..00000000 --- a/cron.d/netsage-maxmind-update.cron +++ /dev/null @@ -1,14 +0,0 @@ -SHELL=/bin/sh -PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin -MAILTO=root - -# Get updated MaxMind GeoLite2-ASN and GeoLite2-City databases from scienceregistry.grnoc.iu.edu -# -q for quiet - so no email if no output. -# Instead of touch, if your wget version has it, you can use --no-use-server-timestamps which sets the file's time to the download time - -## UNCOMMENT AFTER FILLING IN USERNAME AND PW -# on Wednesdays at 23:30 UTC -## 30 23 * * 3 root /usr/bin/wget --user xxx --password xxx https://scienceregistry.grnoc.iu.edu/exported/GeoLite2-ASN.mmdb -q -O /var/lib/grnoc/netsage/newASN.mmdb && mv /var/lib/grnoc/netsage/newASN.mmdb /var/lib/grnoc/netsage/GeoLite2-ASN.mmdb && touch /var/lib/grnoc/netsage/GeoLite2-ASN.mmdb -# -# # on Wednesdays at 23:35 UTC -## 35 23 * * 3 root /usr/bin/wget --user xxx --password xxx https://scienceregistry.grnoc.iu.edu/exported/GeoLite2-City.mmdb -q -O /var/lib/grnoc/netsage/newCity.mmdb && mv /var/lib/grnoc/netsage/newCity.mmdb /var/lib/grnoc/netsage/GeoLite2-City.mmdb && touch /var/lib/grnoc/netsage/GeoLite2-City.mmdb diff --git a/cron.d/netsage-memberlists-update.cron b/cron.d/netsage-memberlists-update.cron deleted file mode 100644 index 824f78fc..00000000 --- a/cron.d/netsage-memberlists-update.cron +++ /dev/null @@ -1,19 +0,0 @@ -SHELL=/bin/sh -PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin -MAILTO=root - -# Get updated member-org lists from scienceregistry.grnoc.iu.edu -# These will be updated randomly and rarely, but can be downloaded weekly to be able to monitor their freshness -# -q for quiet - so no email if no output. -# Instead of touch, if your wget version has it, you can use --no-use-server-timestamps which sets the file's time to the download time -# - -## UNCOMMENT AFTER FILLING IN USERNAME AND PW -# on Wednesdays at 23:45 UTC -##45 23 * * 3 root /usr/bin/wget --user xxx --password xxx https://scienceregistry.grnoc.iu.edu/exported/FRGP-members-list.rb -q -O /etc/logstash/conf.d/support/newFRGP.rb && mv /etc/logstash/conf.d/support/newFRGP.rb /etc/logstash/conf.d/support/FRGP-members-list.rb && touch /etc/logstash/conf.d/support/FRGP-members-list.rb - -# on Wednesdays at 23:50 UTC -##50 23 * * 3 root /usr/bin/wget --user xxx --password xxx https://scienceregistry.grnoc.iu.edu/exported/ilight-members-list.rb -q -O /etc/logstash/conf.d/support/newilight.rb && mv /etc/logstash/conf.d/support/newilight.rb /etc/logstash/conf.d/support/ilight-members-list.rb && touch /etc/logstash/conf.d/support/ilight-members-list.rb - -# on Wednesdays at 23:55 UTC -##55 23 * * 3 root /usr/bin/wget --user xxx --password xxx https://scienceregistry.grnoc.iu.edu/exported/onenet-members-list.rb -q -O /etc/logstash/conf.d/support/newonenet.rb && mv /etc/logstash/conf.d/support/newonenet.rb /etc/logstash/conf.d/support/onenet-members-list.rb && touch /etc/logstash/conf.d/support/onenet-members-list.rb diff --git a/cron.d/netsage-scireg-update.cron b/cron.d/netsage-scireg-update.cron deleted file mode 100644 index 4e6a7de1..00000000 --- a/cron.d/netsage-scireg-update.cron +++ /dev/null @@ -1,16 +0,0 @@ -SHELL=/bin/sh -PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin -MAILTO=root - -# Get Science Registry info from scienceregistry.netsage.global (scienceregistry.grnoc.iu.edu) -# This "fake geoip" mmdb file is used by logstash -# -q for quiet - no email if no output. -# Instead of touch, if your wget version has it, you can use --no-use-server-timestamps which sets the file's time to the download time - -## UNCOMMENT AFTER CONFIRMING A TIME TO RUN -# daily at 00:00 UTC -##00 00 * * * root /usr/bin/wget https://scienceregistry.netsage.global/exported/scireg.mmdb -q -O /var/lib/grnoc/netsage/scireg.mmdb.new && mv /var/lib/grnoc/netsage/scireg.mmdb.new /var/lib/grnoc/netsage/scireg.mmdb && touch /var/lib/grnoc/netsage/scireg.mmdb - -# get yaml file in case a human wants to view the data. csv and json are also available. -##05 00 * * * root /usr/bin/wget https://scienceregistry.netsage.global/exported/scireg.yaml -q -O /var/lib/grnoc/netsage/scireg.yaml.new && mv /var/lib/grnoc/netsage/scireg.yaml.new /var/lib/grnoc/netsage/scireg.yaml && touch /var/lib/grnoc/netsage/scireg.yaml - diff --git a/setup-cron.sh b/setup-cron.sh new file mode 100755 index 00000000..dd62b575 --- /dev/null +++ b/setup-cron.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# This script modifies docker-netsage-downloads.cron and docker-netsage-downloads.sh +# to fill in user and path info. + +# USER and PWD env vars are assumed to be already set +cp cron.d/docker-netsage-downloads.cron.ORIG cron.d/docker-netsage-downloads.cron +sed -i "s|-USER-|$USER|" cron.d/docker-netsage-downloads.cron +sed -i "s|-PATH-TO-GIT-CHECKOUT-|$PWD|" cron.d/docker-netsage-downloads.cron +cp bin/docker-netsage-downloads.sh.ORIG bin/docker-netsage-downloads.sh +sed -i "s|-PATH-TO-GIT-CHECKOUT-|$PWD|g" bin/docker-netsage-downloads.sh + +echo "" +echo "Cron and bin files have been set up." +echo "-> Please check cron.d/docker-netsage-downloads.cron for correct user and path, " +echo "-> and copy it to /etc/cron.d/." +echo "If you need to immediately download files, run bin/docker-netsage-downloads.sh manually." +echo "" + diff --git a/setup-pmacct.sh b/setup-pmacct.sh old mode 100644 new mode 100755 From 4358255b39c56ef8c5b1eaec7c7ebfa09d77b888 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 14 Jul 2022 16:10:33 +0000 Subject: [PATCH 096/126] modified spec and MANIFEST files for new cron file --- MANIFEST | 5 +---- grnoc-netsage-pipeline.spec | 10 ++-------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/MANIFEST b/MANIFEST index 76a1093c..36ad0884 100644 --- a/MANIFEST +++ b/MANIFEST @@ -35,11 +35,8 @@ conf-logstash/ruby/domestic.rb conf-logstash/support/sensor_groups.json conf-logstash/support/sensor_types.json conf-logstash/support/networkA-members-list.rb.example -cron.d/netsage-maxmind-update.cron -cron.d/netsage-caida-update.cron -cron.d/netsage-scireg-update.cron +cron.d/bm-netsage-downloads.cron cron.d/netsage-logstash-restart.cron -cron.d/netsage-memberlists-update.cron systemd/logstash.service systemd/sfacctd.service systemd/nfacctd.service diff --git a/grnoc-netsage-pipeline.spec b/grnoc-netsage-pipeline.spec index 0970f3f1..0302757a 100644 --- a/grnoc-netsage-pipeline.spec +++ b/grnoc-netsage-pipeline.spec @@ -80,10 +80,7 @@ make pure_install %{__install} bin/restart-logstash.sh %{buildroot}/usr/bin/restart-logstash.sh -%{__install} cron.d/netsage-scireg-update.cron %{buildroot}/etc/cron.d/netsage-scireg-update.cron -%{__install} cron.d/netsage-maxmind-update.cron %{buildroot}/etc/cron.d/netsage-maxmind-update.cron -%{__install} cron.d/netsage-caida-update.cron %{buildroot}/etc/cron.d/netsage-caida-update.cron -%{__install} cron.d/netsage-memberlists-update.cron %{buildroot}/etc/cron.d/netsage-memberlists-update.cron +%{__install} cron.d/bm-netsage-downloads.cron %{buildroot}/etc/cron.d/netsage-downloads.cron %{__install} cron.d/netsage-logstash-restart.cron %{buildroot}/etc/cron.d/netsage-logstash-restart.cron %{__install} systemd/logstash.service %{buildroot}/etc/systemd/system/logstash.service @@ -114,10 +111,7 @@ rm -rf $RPM_BUILD_ROOT %defattr(644, root, root, 755) # Don't overwrite cron files. Create .rpmnew files if needed. -%config(noreplace) /etc/cron.d/netsage-scireg-update.cron -%config(noreplace) /etc/cron.d/netsage-maxmind-update.cron -%config(noreplace) /etc/cron.d/netsage-caida-update.cron -%config(noreplace) /etc/cron.d/netsage-memberlists-update.cron +%config(noreplace) /etc/cron.d/netsage-downloads.cron %config(noreplace) /etc/cron.d/netsage-logstash-restart.cron # Don't overwrite these .confs. Create .rpmnew files if needed. From 328e7253a81c7a4fe931b215659fb599a5c72b81 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 14 Jul 2022 23:53:02 +0000 Subject: [PATCH 097/126] Added logstash-temp volume for agg map files; renamed and cleaned up some files --- .gitignore | 5 +- compose/importer/Dockerfile | 53 ---------------- compose/importer/docker_init.sh | 33 ---------- compose/importer/grnoc7.repo | 6 -- compose/importer/logging.conf | 4 -- compose/importer/netsage_shared.xml | 65 -------------------- compose/importer/run.sh | 5 -- compose/logstash.repo | 8 --- compose/logstash/Dockerfile | 14 ----- compose/logstash/pipelines.yml | 2 - conf-logstash/40-aggregation.conf | 2 +- data/.place_holder | 0 docker-compose.yml | 5 +- env.example | 4 +- {downloads => logstash-downloads}/.gitignore | 0 userConfig/README.md | 2 +- util/{hist-export.pl => histogram-export.pl} | 0 util/netsage-raw-data-importer | 54 ---------------- util/netsage_raw_data_importer.xml.example | 39 ------------ 19 files changed, 12 insertions(+), 289 deletions(-) delete mode 100644 compose/importer/Dockerfile delete mode 100755 compose/importer/docker_init.sh delete mode 100644 compose/importer/grnoc7.repo delete mode 100644 compose/importer/logging.conf delete mode 100644 compose/importer/netsage_shared.xml delete mode 100755 compose/importer/run.sh delete mode 100644 compose/logstash.repo delete mode 100644 compose/logstash/Dockerfile delete mode 100644 compose/logstash/pipelines.yml delete mode 100644 data/.place_holder rename {downloads => logstash-downloads}/.gitignore (100%) rename util/{hist-export.pl => histogram-export.pl} (100%) delete mode 100755 util/netsage-raw-data-importer delete mode 100644 util/netsage_raw_data_importer.xml.example diff --git a/.gitignore b/.gitignore index 2f452f70..97797d49 100644 --- a/.gitignore +++ b/.gitignore @@ -7,7 +7,10 @@ conf/systemd/deploy .env docker-compose.override.yml userConfig -data +bin/docker-netsage-downloads.sh +cron.d/docker-netsage-downloads.cron +conf-pmacct/*_1* +conf-pmacct/*_2* # Dependencies /website/node_modules diff --git a/compose/importer/Dockerfile b/compose/importer/Dockerfile deleted file mode 100644 index 7eba027e..00000000 --- a/compose/importer/Dockerfile +++ /dev/null @@ -1,53 +0,0 @@ -FROM centos:7 - -## Stage 1 Build the PRMs -## Setup baseline -RUN \ - yum -y update && \ - yum install -y epel-release && \ - yum install -y rpm-build perl-ExtUtils-MakeMaker make - -COPY . /root/code -WORKDIR /root/code - -RUN mkdir rpmbuild && cd rpmbuild && \ - mkdir BUILD BUILDROOT RPMS SOURCES SPECS SRPMS TMP && \ - cd /root/code/ && perl Makefile.PL - - -#RUN chown -R coder /home/coder -RUN make rpm - -## Stage 2 -FROM centos:7 - -COPY --from=0 /root/rpmbuild/RPMS/noarch/*.rpm /tmp - -COPY compose/importer/grnoc7.repo /etc/yum.repos.d/grnoc7.repo -COPY compose/importer/docker_init.sh /tmp/ -COPY compose/importer/run.sh /tmp/ -COPY compose/importer/netsage_shared.xml /etc/grnoc/netsage/deidentifier/ -COPY compose/importer/logging.conf /etc/grnoc/netsage/deidentifier/ - -## Setup baseline -RUN \ - yum -y update && \ - yum install -y dnf epel-release && \ - yum install -y nfdump wget && \ - dnf install -y /tmp/*.rpm && \ - yum clean all && \ - rm -rf /var/cache/yum - -RUN mkdir /data; chown 777 /data - -## Exposed but likely not needed -VOLUME /var/cache/netsage/ -VOLUME /etc/grnoc/netsage/deidentifier/ -VOLUME /var/lib/grnoc/netsage/ - -#Data volume -VOLUME /data -## Config exposed -VOLUME /etc/grnoc/netsage/ - -CMD /tmp/run.sh diff --git a/compose/importer/docker_init.sh b/compose/importer/docker_init.sh deleted file mode 100755 index 691166b6..00000000 --- a/compose/importer/docker_init.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env bash - -#DATA_DIR=/var/lib/grnoc/netsage/ -DATA_DIR=/data/cache/ -LOGSTASH_DIR=/usr/share/logstash/pipeline/support -mkdir -p $DATA_DIR && echo "Cache directory ${DATA_DIR} created" || echo "cache dir ${DATA_DIR} already exists" - -FILES="GeoLite2-ASN scireg GeoLite2-City" -CAIDA_FILES="CAIDA-org-lookup" -RUBY_DATA="FRGP-members-list ilight-members-list onenet-members-list" - -function downloadFiles() { - ext=$1 - shift 1 - ## Download all files to temporary destination - for f in $@; do - wget https://scienceregistry.grnoc.iu.edu/exported/${f}.${ext} --no-use-server-timestamps -q -O ${DATA_DIR}/$f.tmp - done - - ## Rename the temporary files to replace the production ones. - for f in $@; do - mv ${DATA_DIR}/$f.tmp ${DATA_DIR}/${f}.${ext} - done - -} - -echo "Download ScienceRegistry and maxmind" -downloadFiles mmdb $FILES -echo "Download Caida Files" -downloadFiles csv $CAIDA_FILES -echo "Download Ruby files" -DATA_DIR=$LOGSTASH_DIR -downloadFiles rb $RUBY_DATA diff --git a/compose/importer/grnoc7.repo b/compose/importer/grnoc7.repo deleted file mode 100644 index 082a3671..00000000 --- a/compose/importer/grnoc7.repo +++ /dev/null @@ -1,6 +0,0 @@ -[grnoc7] -name=GlobalNOC Public el7 Packages - $basearch -baseurl=https://repo-public.grnoc.iu.edu/repo/7/$basearch -enabled=1 -gpgcheck=1 -gpgkey=https://repo-public.grnoc.iu.edu/repo/RPM-GPG-KEY-GRNOC7 diff --git a/compose/importer/logging.conf b/compose/importer/logging.conf deleted file mode 100644 index 79c48b75..00000000 --- a/compose/importer/logging.conf +++ /dev/null @@ -1,4 +0,0 @@ -log4j.rootLogger=INFO, CONSOLE -log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender -log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout -log4j.appender.CONSOLE.layout.ConversionPattern=%-4r %-5p %c %x - %m%n diff --git a/compose/importer/netsage_shared.xml b/compose/importer/netsage_shared.xml deleted file mode 100644 index fd6117ee..00000000 --- a/compose/importer/netsage_shared.xml +++ /dev/null @@ -1,65 +0,0 @@ - - - - - - - /data/input_data/netflow - $netflowSensorName - netflow - - - - /data/input_data/sflow - $sflowSensorName - sflow - - - - - - - 1 - 3 - - - - - rabbit - 5672 - guest - guest - 0 - 100 - / - 1 - - - rabbit - 5672 - guest - guest - 0 - 100 - / - 1 - - - diff --git a/compose/importer/run.sh b/compose/importer/run.sh deleted file mode 100755 index aba024b9..00000000 --- a/compose/importer/run.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash - -/tmp/docker_init.sh - -netsage-netflow-importer-daemon --nofork --config /etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml \ No newline at end of file diff --git a/compose/logstash.repo b/compose/logstash.repo deleted file mode 100644 index db5793b0..00000000 --- a/compose/logstash.repo +++ /dev/null @@ -1,8 +0,0 @@ -[elastic-7.x] -name=Elastic repository for 7.x packages -baseurl=https://artifacts.elastic.co/packages/7.x/yum -gpgcheck=1 -gpgkey=https://artifacts.elastic.co/GPG-KEY-elasticsearch -enabled=1 -autorefresh=1 -type=rpm-md \ No newline at end of file diff --git a/compose/logstash/Dockerfile b/compose/logstash/Dockerfile deleted file mode 100644 index d81448f5..00000000 --- a/compose/logstash/Dockerfile +++ /dev/null @@ -1,14 +0,0 @@ -FROM docker.elastic.co/logstash/logstash:7.16.2 - -#Create symlink so can use paths from production with logstash docker defaults -USER root -RUN mkdir -p /etc/logstash && \ - ln -s /usr/share/logstash/pipeline /etc/logstash/conf.d - -COPY --chown=logstash:root compose/logstash/pipelines.yml /usr/share/logstash/config/ - -USER logstash - -VOLUME /var/cache/netsage -VOLUME /var/lib/grnoc/netsage/ -VOLUME /usr/share/logstash/config/ diff --git a/compose/logstash/pipelines.yml b/compose/logstash/pipelines.yml deleted file mode 100644 index 464b2c7e..00000000 --- a/compose/logstash/pipelines.yml +++ /dev/null @@ -1,2 +0,0 @@ -- pipeline.id: elastiflow - path.config: "/usr/share/logstash/pipeline/*.conf" \ No newline at end of file diff --git a/conf-logstash/40-aggregation.conf b/conf-logstash/40-aggregation.conf index af155841..628a853b 100644 --- a/conf-logstash/40-aggregation.conf +++ b/conf-logstash/40-aggregation.conf @@ -76,7 +76,7 @@ filter { # Save the aggregation map as a new event upon timeout push_map_as_event_on_timeout => true - # Save all the in-progress aggregation maps when logstash shuts down, to be read back in when it restarts. + # Save all the in-progress aggregation maps to this file when logstash shuts down, to be read back in when it restarts. ## (use a different file for each logstash pipeline!) aggregate_maps_path => '${aggregation_maps_path:/tmp/logstash-aggregation-maps}' diff --git a/data/.place_holder b/data/.place_holder deleted file mode 100644 index e69de29b..00000000 diff --git a/docker-compose.yml b/docker-compose.yml index c91c4520..f51476eb 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -72,7 +72,10 @@ services: # location of logstash configs on host : location within container : read-only - ./conf-logstash/:/etc/logstash/conf.d/:ro # location of downloaded maxmind and caida files on host : location within container : read-only - - ./downloads/:/var/lib/grnoc/netsage/:ro + - ./logstash-downloads/:/var/lib/grnoc/netsage/:ro + # location for aggregation map files, which save flows being aggregated when logstash shuts down + - ./logstash-temp/:/logstash-temp/ + networks: - netsage-network depends_on: diff --git a/env.example b/env.example index d235f334..609ccfb5 100644 --- a/env.example +++ b/env.example @@ -28,10 +28,10 @@ rabbitmq_output_key=processed_flows # Aggregation Filter settings # Default inactivity_timeout is 6-minute. If no matching flows have come in for 6 minutes, end the aggregated flow. # Default max_flow_timeout is 1 hour. This is the maximum allowed flow duration; longer flows will be broken up. -# Aggregation_maps_path is where flows undergoing aggregation are saved if logstash shuts down. The default is for Docker installs. +# Aggregation_maps_path is the file where flows undergoing aggregation are saved if logstash shuts down. The default is for Docker installs. inactivity_timeout=360 max_flow_timeout=3600 -aggregation_maps_path=/data/logstash-aggregation-maps +aggregation_maps_path=/logstash-temp/logstash-aggregation-maps # Advanced Processing Options - see the "Docker Advanced" documentation diff --git a/downloads/.gitignore b/logstash-downloads/.gitignore similarity index 100% rename from downloads/.gitignore rename to logstash-downloads/.gitignore diff --git a/userConfig/README.md b/userConfig/README.md index 81c3e819..f203cb77 100644 --- a/userConfig/README.md +++ b/userConfig/README.md @@ -2,6 +2,6 @@ This directory is git ignore so it ensures any changes here are preserved. Any user overrides should go in here and saved for the next release. -Example of user overrides would be special importer configuration, logstash settings that are not configured via env and so on. +Example of user overrides would be special logstash settings that are not configured via env and so on. diff --git a/util/hist-export.pl b/util/histogram-export.pl similarity index 100% rename from util/hist-export.pl rename to util/histogram-export.pl diff --git a/util/netsage-raw-data-importer b/util/netsage-raw-data-importer deleted file mode 100755 index 695003f0..00000000 --- a/util/netsage-raw-data-importer +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/perl - -##### I believe this was used for development and to read files created by the old netsage-flow-archive pipeline piece -##### Reads json files which are rabbitmq messages which are batches of 100 flows. -##### Used with RawDataImporter.pm - -use strict; -use warnings; - -use GRNOC::NetSage::Deidentifier::RawDataImporter; - -use Getopt::Long; -use Data::Dumper; - -### constants ### - -use constant DEFAULT_CONFIG_FILE => '/etc/grnoc/netsage/deidentifier/netsage_raw_data_importer.xml'; -use constant DEFAULT_LOGGING_FILE => '/etc/grnoc/netsage/deidentifier/logging.conf'; - -### command line options ### - -my $config = DEFAULT_CONFIG_FILE; -my $logging = DEFAULT_LOGGING_FILE; -my $nofork; -my @files; -my $help; - -GetOptions( 'config=s' => \$config, - 'logging=s' => \$logging, - 'nofork' => \$nofork, - 'file=s{1,}' => \@files, - 'help|h|?' => \$help ); - -# did they ask for help? -usage() if $help; - -# start/daemonize writer -my $raw_importer = GRNOC::NetSage::Deidentifier::RawDataImporter->new( config_file => $config, - logging_file => $logging, - daemonize => !$nofork, - files => \@files, - process_name => "netsage_raw_data_importer", - task_type => "noinput" ); - -$raw_importer->start("no_input_queue"); - -### helpers ### - -sub usage { - - print "Usage: $0 [--config ] [--logging ] [--jsonfile ]\n"; - - exit( 1 ); -} diff --git a/util/netsage_raw_data_importer.xml.example b/util/netsage_raw_data_importer.xml.example deleted file mode 100644 index d2cf67a9..00000000 --- a/util/netsage_raw_data_importer.xml.example +++ /dev/null @@ -1,39 +0,0 @@ - - - - 127.0.0.1 - 5672 - guest - guest - 0 - 100 - / - /path/to/certificate.crt - netsage_deidentifier_netflow_raw2 - 2 - 1 - - - 127.0.0.1 - 5672 - guest - guest - 0 - 100 - / - /path/to/certificate.crt - 3 - netsage_deidentified - netsage_deidentified - - - - - - 1 - - - /var/run/netsage-raw-importer.pid - - - From a91cb6a193e1b2b93a56d0d9e98762e6713e6fe4 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Fri, 15 Jul 2022 01:37:08 +0000 Subject: [PATCH 098/126] ensure logstash runs as user 1000; in setup-cron.sh, change owner of logstash-temp/ to 1000. This all ensures agg map files can be written. --- docker-compose.yml | 1 + logstash-temp/.gitignore | 4 ++++ setup-cron.sh | 15 +++++++++++---- setup-pmacct.sh | 7 ++++--- 4 files changed, 20 insertions(+), 7 deletions(-) create mode 100644 logstash-temp/.gitignore diff --git a/docker-compose.yml b/docker-compose.yml index f51476eb..ab47a422 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -65,6 +65,7 @@ services: logstash: container_name: logstash image: docker.elastic.co/logstash/logstash:7.16.2 + user: 1000:1000 env_file: .env # Explicitly specify *.conf to be sure logstash doesn't use *.disabled configs. command: logstash -f /etc/logstash/conf.d/*.conf diff --git a/logstash-temp/.gitignore b/logstash-temp/.gitignore new file mode 100644 index 00000000..5e7d2734 --- /dev/null +++ b/logstash-temp/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore diff --git a/setup-cron.sh b/setup-cron.sh index dd62b575..dec3ae23 100755 --- a/setup-cron.sh +++ b/setup-cron.sh @@ -11,9 +11,16 @@ cp bin/docker-netsage-downloads.sh.ORIG bin/docker-netsage-downloads.sh sed -i "s|-PATH-TO-GIT-CHECKOUT-|$PWD|g" bin/docker-netsage-downloads.sh echo "" -echo "Cron and bin files have been set up." -echo "-> Please check cron.d/docker-netsage-downloads.cron for correct user and path, " -echo "-> and copy it to /etc/cron.d/." -echo "If you need to immediately download files, run bin/docker-netsage-downloads.sh manually." +echo " Cron and bin files have been set up." +echo " -> Please check cron.d/docker-netsage-downloads.cron for correct user and path, and " +echo " -> COPY IT TO /etc/cron.d/." +echo " If you need to immediately download files, run bin/docker-netsage-downloads.sh manually." +echo "" + +# Also... When we restart logstash, the process needs to be able to write then read a file in logstash-temp/. +# Set the owner and group of logstash-temp/ to 1000, which is the default uid of the user that logstash runs as (see docker-compose.yml). +echo " If requested, enter the sudo password to allow the script to change the owner of logstash-temp/" +echo " (If you get an error, please change the owner and group of logstash-temp/ to 1000. It doesn't matter what username this maps to.)" +sudo chown 1000:1000 logstash-temp echo "" diff --git a/setup-pmacct.sh b/setup-pmacct.sh index 53c67385..b5440973 100755 --- a/setup-pmacct.sh +++ b/setup-pmacct.sh @@ -105,7 +105,8 @@ fi envsubst < docker-compose.override.yml > docker-compose.override.yml.temp mv docker-compose.override.yml.temp docker-compose.override.yml - -echo "Pmacct config files have been created, based on the .env file." -echo "Please check the docker-compose.override.yml file to be sure it matches the .env file!" +echo " Pmacct config files have been created, based on the .env file." +echo " Please check the docker-compose.override.yml file to be sure it matches the .env file!" echo "" + + From f40df44d36f1e51a29fc25483a3a35b08a647d96 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Jul 2022 01:59:13 +0000 Subject: [PATCH 099/126] Bump terser from 4.8.0 to 4.8.1 in /website Bumps [terser](https://github.com/terser/terser) from 4.8.0 to 4.8.1. - [Release notes](https://github.com/terser/terser/releases) - [Changelog](https://github.com/terser/terser/blob/master/CHANGELOG.md) - [Commits](https://github.com/terser/terser/commits) --- updated-dependencies: - dependency-name: terser dependency-type: indirect ... Signed-off-by: dependabot[bot] --- website/yarn.lock | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/website/yarn.lock b/website/yarn.lock index b3362c9a..c812f1d0 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -2602,9 +2602,9 @@ browserslist@^4.0.0, browserslist@^4.12.0, browserslist@^4.14.5, browserslist@^4 node-releases "^1.1.70" buffer-from@^1.0.0: - version "1.1.1" - resolved "https://registry.yarnpkg.com/buffer-from/-/buffer-from-1.1.1.tgz#32713bc028f75c02fdb710d7c7bcec1f2c6070ef" - integrity sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A== + version "1.1.2" + resolved "https://registry.yarnpkg.com/buffer-from/-/buffer-from-1.1.2.tgz#2b146a6fd72e80b4f55d255f35ed59a3a9a41bd5" + integrity sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ== buffer-indexof@^1.0.0: version "1.1.1" @@ -8943,9 +8943,9 @@ source-map-resolve@^0.5.0: urix "^0.1.0" source-map-support@~0.5.12, source-map-support@~0.5.19: - version "0.5.19" - resolved "https://registry.yarnpkg.com/source-map-support/-/source-map-support-0.5.19.tgz#a98b62f86dcaf4f67399648c085291ab9e8fed61" - integrity sha512-Wonm7zOCIJzBGQdB+thsPar0kYuCIzYvxZwlBa87yi/Mdjv7Tip2cyVbLj5o0cFPN4EVkuTwb3GDDyUx2DGnGw== + version "0.5.21" + resolved "https://registry.yarnpkg.com/source-map-support/-/source-map-support-0.5.21.tgz#04fe7c7f9e1ed2d662233c28cb2b35b9f63f6e4f" + integrity sha512-uBHU3L3czsIyYXKX88fdrGovxdSCoTGDRZ6SYXtSRxLZUzHg5P/66Ht6uoUlHu9EZod+inXhKo3qQgwXUT/y1w== dependencies: buffer-from "^1.0.0" source-map "^0.6.0" @@ -9303,9 +9303,9 @@ terser-webpack-plugin@^4.1.0: webpack-sources "^1.4.3" terser@^4.1.2, terser@^4.6.3: - version "4.8.0" - resolved "https://registry.yarnpkg.com/terser/-/terser-4.8.0.tgz#63056343d7c70bb29f3af665865a46fe03a0df17" - integrity sha512-EAPipTNeWsb/3wLPeup1tVPaXfIaU68xMnVdPafIL1TV05OhASArYyIfFvnvJCNrR2NIOvDVNNTFRa+Re2MWyw== + version "4.8.1" + resolved "https://registry.yarnpkg.com/terser/-/terser-4.8.1.tgz#a00e5634562de2239fd404c649051bf6fc21144f" + integrity sha512-4GnLC0x667eJG0ewJTa6z/yXrbLGv80D9Ru6HIpCQmO+Q4PfEtBFi0ObSckqwL6VyQv/7ENJieXHo2ANmdQwgw== dependencies: commander "^2.20.0" source-map "~0.6.1" From 65ef7a8039342db79eb0d037fa537e8ba110183d Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Tue, 2 Aug 2022 15:05:54 +0000 Subject: [PATCH 100/126] Modified some comments --- CHANGES.md | 35 +++++++++++++++++++---------------- setup-cron.sh | 4 ++-- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index cdb5fa32..42ed5c1f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,37 +1,40 @@ ------------------------------------------------------ -## GRNOC NetSage Pipeline 2.0.0 -- -NEW PACKAGE NAME; PMACCT INSTEAD OF NFDUMP AND IMPORTER +## GRNOC NetSage Pipeline 2.0.0 --, 2022 +NEW PACKAGE NAME; USING PMACCT INSTEAD OF NFDUMP AND IMPORTER ------------------------------------------------------ Features: * Renamed package to grnoc-netsage-pipeline - * Got rid of importer references, requirements, files, etc. + * Got rid of old importer references, requirements, files, etc. * Used the %post section in the spec file to check to see if pmacct is installed. * Added systemd unit files for sfacctd and nfacctd (default will be 1 sflow, 1 netflow source, for docker installs) * Revised docker-compose.yml file, etc. to work with pmacct containers. * Revised parts of the .env file, including adding variables for number of sflow and netflow sensors. - * Added default sfacct and nfacct config files in conf-pmacct/ (ORIG files to be copied) - * The default inactive timeout for logstash aggregation has been set to 6 minutes (to go with 5 minute sflow aggregation by sfacctd) + * Added default sfacct and nfacct config files in conf-pmacct/ (.ORIG files to be copied) * Added a setup script (setup-pmacct.sh) which the user runs to create pmacct config files and create or modify docker-compose.override.yml, filling in environment variables set in the .env file. (pmacct configs cannot use env vars directly.) - * The number of sflow or netflow sensors can be 0. In this case, the setup script makes the container run an echo command after which it shuts down. + * The number of sflow or netflow sensors can be 0. In this case, the setup script makes the container just run an echo command + after which it shuts down. * Added 05-translate-pmacct.conf logstash config to translate pmacct fields to ones the pipeline uses. * Revised 40-aggregation.conf to deal with pmacct; there are separate sections for sflow and netflow. - * For netflow, in 40-aggregation.conf, adjust start time of incoming flows if duration is over the active timeout. ("updates" to long lasting flows) - * When cutting start times for netflow updates in 40-aggregation.conf, use the inactive timeout variable from the env file instead of hard coding 1 hour. + * For netflow, in 40-aggregation.conf, the start time of incoming flows will be adjusted if duration is greater than the active timeout + (ie, for "updates" to long lasting flows) * The default inactive timeout for logstash aggregation has been set to 6 minutes (to go with 5 minute sflow aggregation by sfacctd) - * Added 41-thresholds.conf - applies size threshold of 10 MB (otherwise drop) and duration threshold of 0.1 sec (otherwise set rates to 0) - * Added new field @sampling_corrected = yes/no. If sampling rate correction has been applied by pmacct or logstash, value will be yes. - * Sampling rate corrections will be done in logstash when requested (flag is set) in the env file but + * Added 41-thresholds.conf - applies size threshold of 10 MB (drop smaller flows) and duration threshold of 0.1 sec (set rates to 0 if shorter) + * Added new field: @sampling_corrected = yes/no. If sampling rate correction has been applied by pmacct or logstash, value will be yes. + * Sampling rate corrections will be done in logstash when requested (ie, flag is set in the env file) but ONLY IF a correction has not yet been applied (by pmacct). * Sensor list for sampling rate corrections in the env file is now semicolon-delimited. * Allowed "ALL" when specifying sensors for sampling rate corrections. * When a sampling rate correction is applied by logstash, add a tag with the rate. * Added CERN and Utah regexes to sensor type and group files. - * Added env file option to skip de-identification. - * 0.0.0.x and 0.0.0.0 flows are tagged, and dropped by default. Unadvertised option to keep them is available in the env file. - * Changed to sensor_groups.json.example and sensor_types.json.example. From now on, our particular regexes will be downloaded from scienceregistry.grnoc. - * Cron file runs script to download all files from scienceregistry.grnoc once/wk - * setup-cron.sh script copies ORIG .cron and .sh files and plugs in username and location of git checkout. + * Added an env file option to skip de-identification. + * 0.0.0.x and 0.0.0.0 flows are tagged and dropped by default. (Unadvertised option to keep them is available in the env file.) + * Changed to sensor_groups.json.example and sensor_types.json.example. From now on, our particular files/regexes will be downloaded from + scienceregistry.grnoc. + * Setup-cron.sh script copies .ORIG .cron and .sh files and plugs in username and location of git checkout. User copies to /etc/cron.d/. + * The cron file runs the script to download all files from scienceregistry.grnoc once/wk. + * Docker-compose.yml ensures logstash runs with uid 1000, while setup-cron.sh sets the owner of logstash-temp/ to 1000, + so logstash can write and read aggregation map files when it stops and starts. (User 1000 could be anyone on the host; name doesn't matter.) * Documentation updates * Dependabot automatic remediations of vulnerabilites (for docusaurus) diff --git a/setup-cron.sh b/setup-cron.sh index dec3ae23..fb804db4 100755 --- a/setup-cron.sh +++ b/setup-cron.sh @@ -1,7 +1,7 @@ #!/bin/bash -# This script modifies docker-netsage-downloads.cron and docker-netsage-downloads.sh -# to fill in user and path info. +# This script copies and modifies docker-netsage-downloads.cron.ORIG and docker-netsage-downloads.sh.ORIG +# to make non-example vesions and fill in user and path info. # USER and PWD env vars are assumed to be already set cp cron.d/docker-netsage-downloads.cron.ORIG cron.d/docker-netsage-downloads.cron From d4a79c8066f93dd64c37662d7102e29029e4dbfb Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Tue, 2 Aug 2022 21:26:06 +0000 Subject: [PATCH 101/126] Added cron and sh files to restart logstash container. Renamed some files. --- .gitignore | 2 ++ CHANGES.md | 8 +++-- MANIFEST | 6 ++-- bin/restart-logstash-container.sh.ORIG | 35 +++++++++++++++++++ ...ogstash.sh => restart-logstash-service.sh} | 0 ....cron => baremetal-netsage-downloads.cron} | 0 cron.d/restart-logstash-container.cron.ORIG | 7 ++++ ...art.cron => restart-logstash-service.cron} | 0 grnoc-netsage-pipeline.spec | 6 ++-- setup-cron.sh | 19 +++++++--- 10 files changed, 69 insertions(+), 14 deletions(-) create mode 100755 bin/restart-logstash-container.sh.ORIG rename bin/{restart-logstash.sh => restart-logstash-service.sh} (100%) mode change 100644 => 100755 rename cron.d/{bm-netsage-downloads.cron => baremetal-netsage-downloads.cron} (100%) create mode 100644 cron.d/restart-logstash-container.cron.ORIG rename cron.d/{netsage-logstash-restart.cron => restart-logstash-service.cron} (100%) diff --git a/.gitignore b/.gitignore index 97797d49..5183c947 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,8 @@ docker-compose.override.yml userConfig bin/docker-netsage-downloads.sh cron.d/docker-netsage-downloads.cron +bin/restart-logstash-container.sh +cron.d/restart-logstash-container.cron conf-pmacct/*_1* conf-pmacct/*_2* diff --git a/CHANGES.md b/CHANGES.md index 42ed5c1f..1868a6da 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -10,7 +10,7 @@ Features: * Revised docker-compose.yml file, etc. to work with pmacct containers. * Revised parts of the .env file, including adding variables for number of sflow and netflow sensors. * Added default sfacct and nfacct config files in conf-pmacct/ (.ORIG files to be copied) - * Added a setup script (setup-pmacct.sh) which the user runs to create pmacct config files and create or modify docker-compose.override.yml, + * Added setup-pmacct.sh script which the user runs to create pmacct config files and create or modify docker-compose.override.yml, filling in environment variables set in the .env file. (pmacct configs cannot use env vars directly.) * The number of sflow or netflow sensors can be 0. In this case, the setup script makes the container just run an echo command after which it shuts down. @@ -31,8 +31,10 @@ Features: * 0.0.0.x and 0.0.0.0 flows are tagged and dropped by default. (Unadvertised option to keep them is available in the env file.) * Changed to sensor_groups.json.example and sensor_types.json.example. From now on, our particular files/regexes will be downloaded from scienceregistry.grnoc. - * Setup-cron.sh script copies .ORIG .cron and .sh files and plugs in username and location of git checkout. User copies to /etc/cron.d/. - * The cron file runs the script to download all files from scienceregistry.grnoc once/wk. + * Added setup-cron.sh script which copies .ORIG .cron and .sh files and writes in username and the location of the git checkout. + The user must copy cron files to /etc/cron.d/. + * One cron file runs a script to download all files from scienceregistry.grnoc once/wk. + * Another cron file restarts the logstash container each day. * Docker-compose.yml ensures logstash runs with uid 1000, while setup-cron.sh sets the owner of logstash-temp/ to 1000, so logstash can write and read aggregation map files when it stops and starts. (User 1000 could be anyone on the host; name doesn't matter.) diff --git a/MANIFEST b/MANIFEST index 36ad0884..21d1142d 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1,6 +1,6 @@ grnoc-netsage-pipeline.spec CHANGES.md -bin/restart-logstash.sh +bin/restart-logstash-service.sh conf-pmacct/sfacctd.conf conf-pmacct/nfacctd.conf conf-pmacct/sfacct-pretag.map @@ -35,8 +35,8 @@ conf-logstash/ruby/domestic.rb conf-logstash/support/sensor_groups.json conf-logstash/support/sensor_types.json conf-logstash/support/networkA-members-list.rb.example -cron.d/bm-netsage-downloads.cron -cron.d/netsage-logstash-restart.cron +cron.d/baremetal-netsage-downloads.cron +cron.d/restart-logstash-service.cron systemd/logstash.service systemd/sfacctd.service systemd/nfacctd.service diff --git a/bin/restart-logstash-container.sh.ORIG b/bin/restart-logstash-container.sh.ORIG new file mode 100755 index 00000000..c7ac7ca4 --- /dev/null +++ b/bin/restart-logstash-container.sh.ORIG @@ -0,0 +1,35 @@ +#!/bin/bash + +# restart logstash container only if it's already running + +cd -PATH-TO-GIT-CHECKOUT- +date +echo " " + +nlogstash=`docker-compose ps | grep logstash | grep " Up " | wc -l` +if [[ $nlogstash -eq 1 ]] +then + docker-compose stop logstash + echo "Contents of logstash-temp/ after stopping:" + ls -l logstash-temp + echo " " + + nlogstash=`docker-compose ps | grep logstash | grep " Up " | wc -l` + if [[ $nlogstash -eq 0 ]] + then + docker-compose start logstash + sleep 30 # give it plenty of time + echo "Contents of logstash-temp/ after starting:" + ls -l logstash-temp + + nlogstash=`docker-compose ps | grep logstash | grep " Up " | wc -l` + if [[ $nlogstash -eq 0 ]] + then + echo " " + echo "Logstash restart failed?! Check on it! " + fi + fi +else + echo "Logstash is not running so no restart." +fi + diff --git a/bin/restart-logstash.sh b/bin/restart-logstash-service.sh old mode 100644 new mode 100755 similarity index 100% rename from bin/restart-logstash.sh rename to bin/restart-logstash-service.sh diff --git a/cron.d/bm-netsage-downloads.cron b/cron.d/baremetal-netsage-downloads.cron similarity index 100% rename from cron.d/bm-netsage-downloads.cron rename to cron.d/baremetal-netsage-downloads.cron diff --git a/cron.d/restart-logstash-container.cron.ORIG b/cron.d/restart-logstash-container.cron.ORIG new file mode 100644 index 00000000..55f24767 --- /dev/null +++ b/cron.d/restart-logstash-container.cron.ORIG @@ -0,0 +1,7 @@ +SHELL=/bin/sh +PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin +MAILTO=root + +# restart logstash so caida, geoip, and scireg db's are reread, in case they've been updated +# daily at 11:00 UTC +00 11 * * * root -PATH-TO-GIT-CHECKOUT-/bin/restart-logstash-container.sh > -PATH-TO-GIT-CHECKOUT-/logstash-temp/restart-output.txt 2>&1 diff --git a/cron.d/netsage-logstash-restart.cron b/cron.d/restart-logstash-service.cron similarity index 100% rename from cron.d/netsage-logstash-restart.cron rename to cron.d/restart-logstash-service.cron diff --git a/grnoc-netsage-pipeline.spec b/grnoc-netsage-pipeline.spec index 0302757a..82c16c1d 100644 --- a/grnoc-netsage-pipeline.spec +++ b/grnoc-netsage-pipeline.spec @@ -78,10 +78,10 @@ make pure_install %{__install} -d -p %{buildroot}/usr/share/logstash/config/ %{__install} -d -p %{buildroot}/usr/share/doc/grnoc/netsage-pipeline/ -%{__install} bin/restart-logstash.sh %{buildroot}/usr/bin/restart-logstash.sh +%{__install} bin/restart-logstash-service.sh %{buildroot}/usr/bin/restart-logstash.sh -%{__install} cron.d/bm-netsage-downloads.cron %{buildroot}/etc/cron.d/netsage-downloads.cron -%{__install} cron.d/netsage-logstash-restart.cron %{buildroot}/etc/cron.d/netsage-logstash-restart.cron +%{__install} cron.d/restart-logstash-service.cron %{buildroot}/etc/cron.d/netsage-logstash-restart.cron +%{__install} cron.d/baremetal-netsage-downloads.cron %{buildroot}/etc/cron.d/netsage-downloads.cron %{__install} systemd/logstash.service %{buildroot}/etc/systemd/system/logstash.service %{__install} systemd/sfacctd.service %{buildroot}/etc/systemd/system/sfacctd.service diff --git a/setup-cron.sh b/setup-cron.sh index fb804db4..620317dc 100755 --- a/setup-cron.sh +++ b/setup-cron.sh @@ -1,26 +1,35 @@ #!/bin/bash -# This script copies and modifies docker-netsage-downloads.cron.ORIG and docker-netsage-downloads.sh.ORIG +# Copy and modify docker-netsage-downloads.cron.ORIG and .sh.ORIG +# and restart-logstash-container.cron.ORIG and .sh.ORIG # to make non-example vesions and fill in user and path info. +# USER NEEDS TO COPY FILES TO etc/cron.d/ -# USER and PWD env vars are assumed to be already set +# $USER and $PWD env vars are assumed to be already set cp cron.d/docker-netsage-downloads.cron.ORIG cron.d/docker-netsage-downloads.cron sed -i "s|-USER-|$USER|" cron.d/docker-netsage-downloads.cron sed -i "s|-PATH-TO-GIT-CHECKOUT-|$PWD|" cron.d/docker-netsage-downloads.cron cp bin/docker-netsage-downloads.sh.ORIG bin/docker-netsage-downloads.sh sed -i "s|-PATH-TO-GIT-CHECKOUT-|$PWD|g" bin/docker-netsage-downloads.sh +cp cron.d/restart-logstash-container.cron.ORIG cron.d/restart-logstash-container.cron +sed -i "s|-PATH-TO-GIT-CHECKOUT-|$PWD|g" cron.d/restart-logstash-container.cron +cp bin/restart-logstash-container.sh.ORIG bin/restart-logstash-container.sh +sed -i "s|-PATH-TO-GIT-CHECKOUT-|$PWD|g" bin/restart-logstash-container.sh + echo "" echo " Cron and bin files have been set up." -echo " -> Please check cron.d/docker-netsage-downloads.cron for correct user and path, and " -echo " -> COPY IT TO /etc/cron.d/." +echo " Please check cron.d/ docker-netsage-downloads.cron and restart-logstash-container.cron" +echo " files for correct user and path values and " +echo " !!!! >>>>> COPY THEM TO /etc/cron.d/ <<<<< !!!! " echo " If you need to immediately download files, run bin/docker-netsage-downloads.sh manually." echo "" # Also... When we restart logstash, the process needs to be able to write then read a file in logstash-temp/. # Set the owner and group of logstash-temp/ to 1000, which is the default uid of the user that logstash runs as (see docker-compose.yml). echo " If requested, enter the sudo password to allow the script to change the owner of logstash-temp/" -echo " (If you get an error, please change the owner and group of logstash-temp/ to 1000. It doesn't matter what username this maps to.)" +echo " (If you get an error, manually change the owner and group of logstash-temp/ to 1000. It doesn't matter what username this maps to.)" +echo "" sudo chown 1000:1000 logstash-temp echo "" From 116859b5299109b6ba3751725703ef7667d51965 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 3 Aug 2022 20:52:20 +0000 Subject: [PATCH 102/126] Changed default java heap size to 4g in env file. Comment changes. --- docker-compose.yml | 8 ++++---- env.example | 43 +++++++++++++++++++++---------------------- userConfig/README.md | 8 ++++++++ 3 files changed, 33 insertions(+), 26 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index ab47a422..49477bc1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -56,8 +56,8 @@ services: # To view, go to https:///rabbit - "15672:15672" volumes: - # Used a name Volume for the rabbitmq config and files for its own use. - # We want data to persist but don't need to see it. + # Use a named Volume for the rabbitmq config and files for its own use. + # We want data (msgs in the queue) to persist but don't need to see it. - rabbit_vol:/var/lib/rabbitmq networks: - netsage-network @@ -65,8 +65,9 @@ services: logstash: container_name: logstash image: docker.elastic.co/logstash/logstash:7.16.2 - user: 1000:1000 env_file: .env + # user uid + user: 1000:1000 # Explicitly specify *.conf to be sure logstash doesn't use *.disabled configs. command: logstash -f /etc/logstash/conf.d/*.conf volumes: @@ -76,7 +77,6 @@ services: - ./logstash-downloads/:/var/lib/grnoc/netsage/:ro # location for aggregation map files, which save flows being aggregated when logstash shuts down - ./logstash-temp/:/logstash-temp/ - networks: - netsage-network depends_on: diff --git a/env.example b/env.example index 609ccfb5..b227457d 100644 --- a/env.example +++ b/env.example @@ -1,6 +1,6 @@ -# PMACCT SETTINGS +# FOR PMACCT CONFIGS: # Number of sensors of each type -# === UPDATE IF THERE ARE NOT 1 OF EACH TYPE === +#--- UPDATE IF THERE ARE NOT 1 OF EACH TYPE --- sflowSensors=1 netflowSensors=1 @@ -9,17 +9,23 @@ netflowSensors=1 # For each sensor, list the following: # The sensor name to assign to flows # The port on the pipeline host to which the router is sending flows -# === REPLACE EXAMPLE VALUES === +#--- REPLACE EXAMPLE VALUES --- sflowSensorName_1=The Sflow Sensor Name sflowPort_1=8000 netflowSensorName_1=The Netflow Sensor Name netflowPort_1=9000 -# LOGSTASH SETTINGS +# FOR LOGSTASH AND PAMACCT CONFIGS: +# pmacct will write and logstash will read flows from this rabbit host +rabbitmq_input_host=rabbit +rabbitmq_input_username=guest +rabbitmq_input_pw=guest + +# FOR LOGSTASH CONFIGS: # By default, processed flows are sent to a rabbit queue. # The example settings write to the rabbitmq container, where they will accumulate, by default. -# === TO SEND PROCESSED FLOWS TO GlobalNOC, ASK FOR THE PROPER SETTINGS === +#--- TO SEND PROCESSED FLOWS TO GlobalNOC, ASK FOR THE PROPER SETTINGS --- rabbitmq_output_host=rabbit rabbitmq_output_username=guest rabbitmq_output_pw=guest @@ -33,7 +39,7 @@ inactivity_timeout=360 max_flow_timeout=3600 aggregation_maps_path=/logstash-temp/logstash-aggregation-maps -# Advanced Processing Options - see the "Docker Advanced" documentation +# Advanced Processing Options - see the "Docker Advanced" documentation: # To do ifindex (interface) filtering of flows from specified sensors: # Flows from listed sensors will be dropped unless src or dst interface is in the list of ifindexes to keep. @@ -69,24 +75,16 @@ subnet_filter_flag=False # To keep full IP addresses, set this parameter to True. full_IPs_flag=False -# Logstash Process Settings - -# The aggregation filter requires there be only one logstash worker! +# LOGSTASH PROCESS SETTINGS: +# memory - java heap size +LS_JAVA_OPTS=-Xmx4g -Xms4g +# The aggregation filter requires there be only one logstash worker! Do not change. PIPELINE_WORKERS=1 PIPELINE_ORDERED=true -# memory - java heap size -LS_JAVA_OPTS=-Xmx2g -Xms2g -# we are not going to evaluate cluster health and performance -XPACK_MONITORING_ENABLED=false +# other +PIPELINE_ECS_COMPATIBILITY=disabled -# LOGSTASH/PAMACCT SETTINGS -# pmacct will write and logstash will read flows from this rabbit host -# (when running rabbitmq locally outside of docker, input_host should be localhost) -rabbitmq_input_host=rabbit -rabbitmq_input_username=guest -rabbitmq_input_pw=guest - -# RABBITMQ SERVER SETTINGS (container) +# RABBITMQ SERVER SETTINGS: # (for the post-pmacct/pre-logstash queue) RABBIT_HOST=rabbit RABBITMQ_DEFAULT_USER=guest @@ -94,5 +92,6 @@ RABBITMQ_DEFAULT_PASS=guest RABBITMQ_ERLANG_COOKIE='secret cookie' discovery.type=single-node -# In case you run elasticsearch container and want logstash to write to it +# In case you run an elasticsearch container ELASTIC_HOSTNAME='elastic' +XPACK_MONITORING_ENABLED=false diff --git a/userConfig/README.md b/userConfig/README.md index f203cb77..5f4b3317 100644 --- a/userConfig/README.md +++ b/userConfig/README.md @@ -4,4 +4,12 @@ This directory is git ignore so it ensures any changes here are preserved. Any Example of user overrides would be special logstash settings that are not configured via env and so on. +Eg, you could add a custom jvm.options file here and add the following to the docker-compose.override.yml file under logstash: + volumes: + - ./userConfig/jvm.options:/usr/share/logstash/config/jvm.options + +NOTE - don't use both environment variables in the .env file and a custom config file/volume with those settings here. + + + From e7d20ff698efc76367a84b798e072005d108d7b8 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 11 Aug 2022 16:09:06 +0000 Subject: [PATCH 103/126] Revised documentation for 2.0 --- website/docs/components/docker_env.md | 37 +--- website/docs/components/docker_pipeline.md | 22 +- website/docs/deploy/choosing.md | 13 +- .../docs/deploy/docker_install_advanced.md | 188 +++++++----------- website/docs/deploy/docker_install_simple.md | 141 +++++++------ website/docs/deploy/docker_troubleshooting.md | 45 +++-- website/docs/deploy/docker_upgrade.md | 43 ++-- website/docs/pipeline/elasticsearch.md | 8 +- website/docs/pipeline/intro.md | 35 ++-- website/docs/pipeline/logstash.md | 14 +- website/docs/pipeline/pmacct.md | 2 +- website/docs/pipeline/sensors.md | 12 +- website/docs/pipeline/tstat.md | 4 +- 13 files changed, 268 insertions(+), 296 deletions(-) diff --git a/website/docs/components/docker_env.md b/website/docs/components/docker_env.md index 0bfe77ac..ca69aaaa 100644 --- a/website/docs/components/docker_env.md +++ b/website/docs/components/docker_env.md @@ -1,40 +1,23 @@ -Next, copy `env.example` to `.env` -```sh -cp env.example .env -``` +Next, copy `env.example` to `.env` then edit the .env file to set the sensor names, ports, and where to send processed flows. -then edit the .env file to set the sensor names to unique identifiers (with spaces or not, no quotes) ```sh -# Importer settings -sflowSensorName=My sflow sensor name -netflowSensorName=My netflow sensor name +cp env.example .env ``` - - If you have only one collector, remove or comment out the line for the one you are not using. - - If you have more than one of the same type of collector, see the "Docker Advanced" documentation. - :::note -These names uniquely identify the source of the data and will be shown in the Grafana dashboards. In elasticsearch, they are saved in the `meta.sensor_id` field. Choose names that are meaningful and unique. -For example, your sensor names might be "MyNet New York Sflow" and "MyNet Boston Netflow" or "MyNet New York - London" and "MyNet New York - Paris". Whatever makes sense in your situation. +Sensor names uniquely identify the source of the data and will be shown in the Grafana dashboards so they should be understandable by a general audience. For example, your sensor names might be "MyNet New York Sflow" or "MyNet New York to London". (Running your proposed names by a Netsage admin would be helpful.) ::: -You will also want to edit the **Logstash output rabbit queue** section. This section defines where the final data will land after going through the pipeline. By default, it will be written to a rabbitmq queue on `rabbit`, ie, the local rabbitMQ server running in the docker container. Enter a hostname to send to a remote rabbitMQ server (also the correct username, password, and queue key/name). +- By default, the number of sflowSensors and netflowSensors is set to 1 at the top. If you know from the start that you will have only 1 sensor, set either sflowSensors or netflowSensors to 0 and comment out the sensor name and port below. -```sh -rabbitmq_output_host=rabbit@mynet.edu -rabbitmq_output_username=guest -rabbitmq_output_pw=guest -rabbitmq_output_key=netsage_archive_input -``` -:::note -To send processed flow data to GlobalNOC at Indiana University, you will need to obtain settings for this section from your contact. A new queue may need to be set up at IU, as well as allowing traffic from your pipeline host. (At IU, data from the this final rabbit queue will be moved into an Elasticsearch instance for storage and viewing.) -::: + If you will have more than 1 of one type of sensor, see the Docker Advanced Options documentation. -The following options are described in the Docker Advanced section: +- In the next section of the .env file, declare the name of sflow sensor 1 and the port to which the exporter is sending the flows. Similarly for netflow sensor 1. -**To drop all flows except those using the specfied interfaces**: Use if only some flows from a router are of interest and those can be identified by interface. +- You will also want to edit the **rabbit_output** variables. This section defines where the final data will land after going through the pipeline. By default, it will be written to a rabbitmq queue on `rabbit`, ie, the local rabbitMQ server running in the docker container, but there is nothing provided to do anything further with it. -**To change the sensor name for flows using a certain interface**: Use if you want to break out some flows coming into a port and give them a different sensor name. + :::note + To send processed flow data to Indiana University, you will need to obtain settings for this section from your contact. A new queue may need to be set up at IU, as well as allowing traffic from your pipeline host. (At IU, data from the this final rabbit queue will be moved into an Elasticsearch instance for storage and viewing in Netsage Portals.) + ::: -**To "manually" correct flow sizes and rates for sampling for specified sensors**: Use if sampling corrections are not being done automatically. Normally you do not need to use this, but check flows to be sure results are reasonable. diff --git a/website/docs/components/docker_pipeline.md b/website/docs/components/docker_pipeline.md index a0709f08..efe8c944 100644 --- a/website/docs/components/docker_pipeline.md +++ b/website/docs/components/docker_pipeline.md @@ -1,26 +1,28 @@ -Start up the pipeline (all containers) using: +Start up the pipeline (all containers) using ```sh -# docker-compose up -d +docker-compose up -d ``` -This will also restart any containers/processes that have died. "-d" runs containers in the background. +This command will pull down all required docker images and start all the services/containers as listed in the docker-compose.yml and docker-compose.override.yml files. +In general, it will also restart any containers/processes that have died. "-d" runs containers in the background. -You can see the status of the containers and whether any have died (exited) using +You can see the status of the containers and whether any have died (exited) using these commands ```sh -# docker-compose ps +docker-compose ps +docker container ls ``` To check the logs for each of the containers, run ```sh -# docker-compose logs -# docker-compose logs logstash -# docker-compose logs importer -etc. +docker-compose logs logstash +docker-compose logs rabbit +docker-compose logs sfacctd_1 +docker-compose logs nfacctd_1 ``` -Add `-f` or, e.g., `-f logstash` to see new log messages as they arrive. `--timestamps`, `--tail`, and `--since` are also useful -- look up details in Docker documentation. +Add `-f`, e.g. `-f logstash` to see new log messages as they arrive. `--timestamps`, `--tail`, and `--since` are also useful -- look up details in Docker documentation. To shut down the pipeline (all containers) use diff --git a/website/docs/deploy/choosing.md b/website/docs/deploy/choosing.md index 43ae4429..d8531513 100644 --- a/website/docs/deploy/choosing.md +++ b/website/docs/deploy/choosing.md @@ -6,20 +6,15 @@ sidebar_label: Choose Install ## Manual or BareMetal Installation -The Manual (baremetal) Installation Guide will walk you through installing the pipeline using your own server infrastructure and requires you to maintain all the components involved. +The Manual (bare-metal) Installation Guide will walk you through installing the pipeline using your own server infrastructure and requires you to maintain all the components involved. It will likely be a bit better when it comes to performance, and have greater flexibility, but there is also more complexity involved in configuring and setting up. -If you are the ultimate consumer of the data then setting up a baremetal version might be worth doing. Or at least the final rabbitMQ that will be holding the data since it'll like need to handle a large dataset. +If you are the ultimate consumer of the data then setting up a bare-metal version might be worth doing. ## Dockerized Version -The Docker version makes it trivial to bring up the pipeline for both a developer and consumer. The work is mostly already done for you. It should be a simple matter of configuring a few env settings and everything should 'just' work. +The Docker version makes it easier to bring up the pipeline. More of the work is done for you. -If you are simply using the pipeline to deliver the anonymized network stats for someone else's consumption, then using the docker pipeline would be preferred. +If you are simply using the pipeline to deliver the anonymized network stats for someone else's consumption (eg, sending Netsage data to IU), then using the docker pipeline would be preferred. -## Choose your adventure - -- [Manual/Server Installation](bare_metal_install) -- [Simple Docker](docker_install_simple.md) - 1 netflow sensor and/or 1 sflow sensor -- [Advanced Docker](docker_install_advanced.md) - options that allow for more complex configurations diff --git a/website/docs/deploy/docker_install_advanced.md b/website/docs/deploy/docker_install_advanced.md index bc84b812..c8758d33 100644 --- a/website/docs/deploy/docker_install_advanced.md +++ b/website/docs/deploy/docker_install_advanced.md @@ -4,114 +4,113 @@ title: Docker Advanced Options Guide sidebar_label: Docker Advanced Options --- -If the basic Docker Installation does not meet your needs, the following customizations will allow for more complex situations. Find the section(s) which apply to you. +The following customizations will allow for more complex situations than described in the Docker Installation guide. Find the section(s) which apply to you. *Please first read the Docker Installation guide in detail. This guide will build on top of that.* -## To Add an Additional Sflow or Netflow Collector +## To Add Additional Sflow or Netflow Collectors -If you have more than 1 sflow and/or 1 netflow sensor, you will need to create more collectors and modify the importer config file. The following instructions describe the steps needed to add one additional sensor. +Any number of sensors can be accomodated, although if there are more than a few being processed by the same pipeline, you may run into scaling issues. -Any number of sensors can be accomodated, although if there are more than a few being processed by the same Importer, you may run into issues where long-lasting flows from sensosr A time out in the aggregation step while waiting for flows from sensors B to D to be processed. (Another option might be be to run more than one Docker deployment.) +#### a. Edit environment file -#### a. Edit docker-compose.override.yml +As an example, say we have three netflow sensors. In the .env file, first set `netflowSensors=3`. Then, in the next section, add the names and ports for the additional sensors using variable names ending with _2 and _3. Set the port numbers to those you have used. -The pattern to add a flow collector is always the same. To add an sflow collector called example-collector, edit the docker-compose.override.yml file and add something like - -```yaml - example-collector: - image: netsage/nfdump-collector:alpine-1.6.23 - restart: always - command: sfcapd -T all -l /data -S 1 -w -z -p 9997 - volumes: - - ./data/input_data/example:/data - ports: - - "9997:9997/udp" ``` +netflowSensorName_1=The 1st Netflow Sensor Name +netflowPort_1=9000 -- collector name: should be updated to something that has some meaning, in our example "example-collector". -- image: copy from the default collector sections already in the file. -- command: choose between "sfcapd" for sflow and "nfcapd" for netflow, and at the end of the command, specify the port to watch for incoming flow data. -- volumes: specify where to write the nfcapd files. Make sure the path is unique and in ./data/. In this case, we're writing to ./data/input_data/example. Change "example" to something meaningful. -- ports: make sure the port here matches the port you've set in the command. Naturally all ports have to be unique for this host and the router should be configured to export data to the same port. (?? If the port on your docker container is different than the port on your host/local machine, use container_port:host_port.) - -Make sure the indentation is right or you'll get an error about yaml parsing. - -You will also need to uncomment these lines: +netflowSensorName_2=The 2nd Netflow Sensor Name +netflowPort_2=9001 -```yaml - volumes: - - ./userConfig/netsage_override.xml:/etc/grnoc/netsage/deidentifier/netsage_shared.xml +netflowSensorName_3=The 3rd Netflow Sensor Name +netflowPort_3=9002 ``` +#### b. Edit docker-composeoverride_example.yml -#### b. Edit netsage_override.xml - -To make the Pipeline Importer aware of the new data to process, you will need to create a custom Importer configuration: netsage_override.xml. This will replace the usual config file netsage_shared.xml. +Add more nfacctd services to the example override file. When copying and pasting, replace _1 with _2 or _3 in three places! -```sh -cp compose/importer/netsage_shared.xml userConfig/netsage_override.xml ``` +nfacctd_1: + ports: + # port on host receiving flow data : port in the container + - "${netflowPort_1}:${netflowContainerPort_1}/udp" -Edit netsage_override.xml and add a new "collection" section for the new sensor as in the following example. The flow-path should match the path set above in docker-compose.override.yml. $exampleSensorName is a new "variable"; don't replace it here, it will be replaced with a value that you set in the .env file. For the flow-type, enter "sflow" or "netflow" as appropriate. (Enter "netflow" if you're running IPFIX.) +nfacctd_2: + ports: + # port on host receiving flow data : port in the container + - "${netflowPort_2}:${netflowContainerPort_2}/udp" -```xml - - /data/input_data/example/ - $exampleSensorName - sflow - +nfacctd_3: + ports: + # port on host receiving flow data : port in the container + - "${netflowPort_3}:${netflowContainerPort_3}/udp" ``` -#### c. Edit environment file +#### c. Rerun setup-pmacct.sh -Then, in the .env file, add a line that sets a value for the "variable" you referenced above, $exampleSensorName. The value is the name of the sensor which will be saved to elasticsearch and which appears in Netsage Dashboards. Set it to something meaningful and unique. E.g., +Delete (after backing up) docker-compose.override.yml so the pmacct setup script can recreate it along with creating additional nfacctd config files. -```ini -exampleSensorName=MyNet Los Angeles sFlow +``` +rm docker-compose.override.yml +./pmacct-setup.sh ``` +Check docker-compose.override.yml and files in conf-pmacct/ for consistency. -#### d. Running the new collector +#### d. Start new containers -After doing the setup above and selecting the docker version to run, you can start the new collector by running the following line, using the collector name (or by running `docker-compose up -d` to start up all containers): +If you are simply adding new collectors nfacctd_2 and nfacctd_3, and there are no changes to nfacctd_1, you can simply start up the new containers with ```sh -docker-compose up -d example-collector +docker-compose up -d ``` -## To Keep Only Flows From Certain Interfaces -If your sensors are exporting all flows, but only those using a particular interface are relevant, use this option in the .env file. The collectors and importer will save/read all incoming flows, but the logstash pipeline will drop those that do not have src_ifindex OR dst_inindex equal to one of those listed. +Otherwise, or to be safe, bring everything down first, then back up. + +## To Filter Flows by Interface +If your sensors are exporting all flows, but only those using particular interfaces are relevant, use this option in the .env file. All incoming flows will be read in, but the logstash pipeline will drop those that do not have src_ifindex OR dst_inindex equal to one of those listed. (This may create a lot of extra work and overwhelm logstash, so if at all possible, try to limit the flows at the router level or using iptables.) -In the .env file, uncomment lines in the appropriate section and enter the information required. Be sure `ifindex_filter_flag=True` with "True" capitalized as shown, any sensor names are spelled exactly right, and list all the ifindex values of flows that should be kept and processed. Some examples (use just one!): +In the .env file, uncomment lines in the appropriate section and enter the information required. "ALL" can refer to all sensors or all interfaces of a sensor. If a sensor is not referenced at all, all of its flows will be kept. Be sure `ifindex_filter_flag=True` with "True" capitalized as shown, any sensor names are spelled exactly right, and list all the ifindex values of flows that should be kept and processed. Use semicolons to separate sensors. Some examples (use just one!): ```sh -ifindex_filter_keep=123 -ifindex_filter_keep=123,456 -ifindex_filter_keep=Sensor 1: 789 -ifindex_filter_keep=123; Sensor 1: 789; Sensor 2: 800, 900 +ifindex_filter_keep=ALL:123 +ifindex_filter_keep=Sensor 1: 123 +ifindex_filter_keep=Sensor 1: 456, 789 +ifindex_filter_keep=Sensor 1: ALL; Sensor 2: 800, 900 ``` -In the first case, all flows that have src_ifindex = 123 or dst_ifindex = 123 will be kept, regardless of sensor name. (Note that this may be a problem if you have more than 1 sensor with the same ifindex values!) -In the 2nd case, if src or dst ifindex is 123 or 456, the flow will be processed. -In the 3rd case, only flows from Sensor 1 will be filtered, with flows using ifindex 789 kept. -In the last example, any flow with ifindex 123 will be kept. Sensor 1 flows with ifindex 789 (or 123) will be kept, and those from Sensor 2 having ifindex 800 or 900 (or 123) will be kept. +- In the first example, all flows that have src_ifindex = 123 or dst_ifindex = 123 will be kept, regardless of sensor name. All other flows will be discarded. +- In the 2nd case, if src or dst ifindex is 123 and the sensor name is "Sensor 1", the flow will be kept. If there are flows from "Sensor 2", all of them will be kept. +- In the 3rd case, flows from Sensor 1 having ifindex 456 or 789 will be kept. +- In the last example, all Sensor 1 flows will be kept, and those from Sensor 2 having ifindex 800 or 900 will be kept. Spaces don't matter except within the sensor names. Punctuation is required as shown. +## To Filter Flows by Subnet + +With this option, flows from specified sensors will be dropped unless src or dst is in the list of subnets to keep. +"ALL" can refer to all sensors. +If a sensor is not referenced at all, all of its flows will be kept. + +``` +subnet_filter_flag=True +subnet_filter_keep=Sensor A Name: 123.45.6.0/16; Sensor B Name: 123.33.33.0/24, 456.66.66.0/24 +``` ## To Change a Sensor Name Depending on the Interface Used -In some cases, users want to keep all flows from a certain sensor but differentiate between those that enter or exit through specific sensor interfaces. This can be done by using this option in the .env file. +In some cases, users want to keep all flows from a certain sensor but differentiate between those that enter or exit through a specific interface by using a different sensor name. In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all 4 fields are set properly! For example, ```sh ifindex_sensor_rename_flag=True +ifindex_sensor_rename_ifindex=10032 ifindex_sensor_rename_old_name=IU Sflow ifindex_sensor_rename_new_name=IU Bloomington Sflow -ifindex_sensor_rename_ifindex=10032 ``` In this case, any flows from the "IU Sflow" sensor that use interface 10032 (src_ifindex = 10032 OR dst_ifindex = 10032) will have the sensor name changed from "IU Sflow" to "IU Bloomington Sflow". Currently, only one such rename can be configured in Docker and only 1 ifindex is allowed. @@ -121,70 +120,37 @@ Please notify the devs at IU in advance, if you need to modify a sensor name, be ::: ## To Do Sampling Rate Corrections in Logstash -When flow sampling is done, corrections have to be applied. For example, if you are sampling 1 out of 100 flows, for each flow measured, it is assumed that in reality there would be 100 flows of that size with that src and dst, so the number of bits (and the number of packets, bits/s and packets/s) is multiplied by 100. Usually the collector (nfcapd or sfcapd process) gets the sampling rate from the incoming data and applies the correction, but in some cases, the sensor may not send the sampling rate, or there may be a complex set-up that requires a manual correction. With netflow, a manual correction can be applied using the '-s' option in the nfsen config, if nfsen is being used, or the nfcapd command, but this is not convenient when using Docker. For sflow, there is no such option. In either case, the correction can be made in logstash as follows. +When flow sampling is done, corrections have to be applied to the number of packets and bytes. For example, if you are sampling 1 out of 100 flows, for each flow measured, it is assumed that in reality there would be 100 flows of that size with that src and dst, so the number of bits (and the number of packets, bits/s and packets/s) is multiplied by 100. Usually the collector (nfacctd or sfacctd process) gets the sampling rate from the incoming data and applies the correction, but in some cases, the sensor may not send the sampling rate, or there may be a complex set-up that requires a manual correction. In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all 3 fields are set properly! The same correction can be applied to multiple sensors by using a comma-separed list. The same correction applies to all listed sensors. For example, ```sh sampling_correction_flag=True -sampling_correction_sensors=IU Bloomington Sflow, IU Sflow +sampling_correction_sensors=IU Bloomington Sflow, IU Indy Sflow sampling_correction_factor=512 ``` -## To Change How Long Nfcapd Files Are Kept -The importer will automatically delete older nfcapd files for you, so that your disk doesn't fill up. By default, 3 days worth of files will be kept. This can be adjusted by making a netsage_override.xml file: +In this example, all flows from sensors "IU Bloomington Sflow" and "IU Indy Sflow" will have a correction factor of 512 applied by logstash. Any other sensors will not have a correction applied by logstash (presumably pmacct would apply the correction automatically). -```sh -cp compose/importer/netsage_shared.xml userConfig/netsage_override.xml -``` +Note that if pmacct has made a sampling correction already, no additional manual correction will be applied, even if these options are set, +so this can be used *to be sure* a sampling correction is applied. -At the bottom of the file, edit this section to set the number of days worth of files to keep. Set cull-enable to 0 for no culling. Eg, to save 1 days worth of data: -````xml - - 1 - 1 - -```` +## To NOT deidentify flows -You will also need to uncomment these lines in docker-compose.override.yml: +Normally all flows are deidentified before being saved to elasticsearch by dropping by truncating the src and dst IP addresses. If you do NOT want to do this, set full_IPs_flag to True. (You will most likely want to request access control on the grafana portal, as well.) -```yaml - volumes: - - ./userConfig/netsage_override.xml:/etc/grnoc/netsage/deidentifier/netsage_shared.xml ``` - - -## To Save Flow Data to a Different Location - -By default, data is saved to subdirectories in the ./data/ directory (ie, the data/ directory in the git checkout). If you would like to use a different location, there are two options. - -1. The best solution is to create a symlink between ./data/ and the preferred location, or, for an NFS volume, export it as ${PROJECT_DIR}/data. - -During installation, delete the data/ directory (it should only contain .placeholder), then create your symlink. Eg, to use /var/netsage/ instead of data/, -```sh -cd {netsage-pipeline dir} -mkdir /var/netsage -rm data/.placeholder -rmdir data -ln -s /var/netsage {netsage-pipeline dir}/data +# To keep full IP addresses, set this parameter to True. +full_IPs_flag=True ``` -(Check the permissions of the directory.) - -2. Alternatively, update volumes in docker-compose.yml and docker-compose.override.yml Eg, to save nfcapd files to subdirs in /mydir, set the collector volumes to `- /mydir/input_data/netflow:/data` (similarly for sflow) and set the importer and logstash volumes to `- /mydir:/data`. - -:::warning -If you choose to update the docker-compose file, keep in mind that those changes will cause a merge conflict or be wiped out on upgrade. -You'll have to manage the volumes exported and ensure all the paths are updated correctly for the next release manually. -::: ## To Customize Java Settings / Increase Memory Available for Lostash - -If cpu or memory seems to be a problem, try increasing the JVM heap size for logstash from 2GB to 3 or 4, no more than 8. +If cpu or memory use seems to be a problem, try increasing the java JVM heap size for logstash from 4GB to no more than 8. To do this, edit LS_JAVA_OPTS in the .env file. ```yaml -LS_JAVA_OPTS=-Xmx4g -Xms4g +LS_JAVA_OPTS=-Xmx8g -Xms8g ``` Here are some tips for adjusting the JVM heap size (https://www.elastic.co/guide/en/logstash/current/jvm-settings.html): @@ -193,20 +159,6 @@ Here are some tips for adjusting the JVM heap size (https://www.elastic.co/guide - CPU utilization can increase unnecessarily if the heap size is too low, resulting in the JVM constantly garbage collecting. You can check for this issue by doubling the heap size to see if performance improves. - Do not increase the heap size past the amount of physical memory. Some memory must be left to run the OS and other processes. As a general guideline for most installations, don’t exceed 50-75% of physical memory. The more memory you have, the higher percentage you can use. -To modify other logstash settings, rename the provided example file for JVM Options and tweak the settings as desired: - -```sh -cp userConfig/jvm.options_example userConfig/jvm.options -``` - -Also update the docker-compose.override.xml file to uncomment lines in the logstash section. It should look something like this: - -```yaml -logstash: - image: netsage/pipeline_logstash:latest - volumes: - - ./userConfig/jvm.options:/usr/share/logstash/config/jvm.options -``` ## To Bring up Kibana and Elasticsearch Containers diff --git a/website/docs/deploy/docker_install_simple.md b/website/docs/deploy/docker_install_simple.md index c4216138..f17f2dc3 100644 --- a/website/docs/deploy/docker_install_simple.md +++ b/website/docs/deploy/docker_install_simple.md @@ -6,38 +6,44 @@ sidebar_label: Docker Installation In this deployment guide, you will learn how to deploy a basic Netsage setup that includes one sflow and/or one netflow collector. If you have more than one collector of either type, or other special situations, see the Docker Advanced guide. The Docker containers included in the installation are + - sfacctd_1 (sflow collector - receives sflow data and writes it to a rabbit queue) + - nfacctd_1 (netflow collector - receives netflow data and writes it to a rabbit queue) - rabbit (the local RabbitMQ server) - - sflow-collector (receives sflow data and writes nfcapd files) - - netflow-collector (receives netflow data and writes nfcapd files) - - importer (reads nfcapd files and puts flows into a local rabbit queue) - - logstash (logstash pipeline that processes flows and sends them to their final destination, by default a local rabbit queue) - - ofelia (cron-like downloading of files used by the logstash pipeline) + - logstash (logstash pipeline that pulls from the rabbit queue, processes flows, and sends to the final destination) -The code and configs for the importer and logstash pipeline can be viewed in the netsage-project/netsage-pipeline github repo. See netsage-project/docker-nfdump-collector for code related to the collectors. +### 1. Set up a Pipeline Host +Decide where to run the Docker Pipeline and get it set up. The default java heap size for logstash is 4GB so have at least 8GB of memory. Little disk space should be needed. -### 1. Set up Data Sources +Install Docker Engine (docker-ce, docker-ce-cli, containerd.io) - see instructions at [https://docs.docker.com/engine/install/](https://docs.docker.com/engine/install/). + +Start docker +``` +sudo systemctl docker start +``` + +Install Docker Compose from Docker's GitHub repository - see [https://docs.docker.com/compose/install/](https://docs.docker.com/compose/install/). You need to **specify version 1.29.2** (or newer) in the curl command. + +Check default file permissions. If the *logstash* user is not able to access the logstash config files in the git checkout, you'll get an error from logstash saying there are no .conf files found even though they are there. Various components also need to be able to read and write to the data/ directory in the checkout. Defaults of 775 (u=rwx, g=rwx, o=rx) should work. + +### 2. Set up Data Sources The data processing pipeline needs data to ingest in order to do anything, of course. There are three types of data that can be consumed. - sflow - netflow - tstat -At least one of these must be set up on a *sensor* (i.e., flow *exporter* / router), to provide the incoming flow data. -You can do this step later, but it will helpful to have it working first. +At least one of these must be set up on a **sensor** (i.e., flow **exporter** / router), to provide the incoming flow data. +You can do this step later, but it will helpful to have it working first. Check it with tcpdump on the pipeline host. -Sflow and netflow data should be exported to the pipeline host where there will be *collectors* (nfcapd and/or sfcapd processes) ready to receive it (see below). To use the default settings, send sflow to port 9998 and netflow/IPFIX to port 9999. On the pipeline host, allow incoming traffic from the flow exporters, of course. +Configure sflow and netflow to send flow data to the pipeline host. Each sensor/router should send to a different port. +You will list the port numbers in the .env file (see below). +Usually default settings are ok. For netflow/IPFIX, the active timeout is typically 1 minute; also have it send templates every minute or so. -Tstat data should be sent directly to the logstash input rabbit queue "netsage_deidentifier_raw" on the pipeline host. No collector is needed for tstat data. See the netsage-project/tstat-transport repo. (From there, logstash will grab the data and process it the same way as it processes sflow/netflow data. (See the Docker Advanced guide.) +On the pipeline host, configure the firewall to allow incoming traffic from the flow exporters, of course. -### 2. Set up a Pipeline Host -Decide where to run the Docker Pipeline and get it set up. Adjust iptables to allow the flow exporters (routers) to send flow data to the host. +Tstat data should be sent directly to the logstash input rabbit queue "netsage_deidentifier_raw" on the pipeline host. No collector is needed for tstat data. See the netsage-project/tstat-transport repo. (From there, logstash will grab the data and process it the same way as it processes sflow/netflow data. (See the Docker Advanced guide.) -Install Docker Engine (docker-ce, docker-ce-cli, containerd.io) - see instructions at [https://docs.docker.com/engine/install/](https://docs.docker.com/engine/install/). - -Install Docker Compose from Docker's GitHub repository - see [https://docs.docker.com/compose/install/](https://docs.docker.com/compose/install/). You need to **specify version 1.29.2** (or newer) in the curl command. - -Check default file permissions. If the *logstash* user is not able to access the logstash config files in the git checkout, you'll get an error from logstash saying there are no .conf files found even though they are there. Various components also need to be able to read and write to the data/ directory in the checkout. Defaults of 775 (u=rwx, g=rwx, o=rx) should work. ### 3. Clone the Netsage Pipeline Project @@ -46,76 +52,93 @@ Clone the netsage-pipeline project from github. git clone https://github.com/netsage-project/netsage-pipeline.git ``` -When the pipeline runs, it uses the logstash conf files that are in the git checkout (in conf-logstash/), as well as a couple other files like docker-compose.yml, so it is important to checkout the correct version. - -Move into the netsage-pipeline/ directory (**all git and docker commands must be run from inside this directory!**), then checkout the most recent version of the code. It will say you are in 'detached HEAD' state if you don't include -b. +When the pipeline runs, it uses some of the files that are in the git checkout, so it is important to checkout the correct version. +Move into the netsage-pipeline/ directory (**all git, docker, and other commands below must be run from inside this directory!**), then checkout the most recent version of the pipeline (the most recent tag). It will say you are in 'detached HEAD' state. ```sh +cd netsage-pipeline git checkout {tag} ``` -Replace "{tag}" with the release version you intend to use, e.g., "v1.2.11". ("Master" is the development version and is not intended for general use!) -`git status` will confirm which branch you are on, e.g., master or v1.2.11. +Replace "{tag}" with the release version you intend to use, e.g., "v2.0.0". ("Master" is the development version and is not intended for general use!) +`git status` will confirm which branch you are on, e.g., master or v2.0.0. -### 4. Create Docker-compose.override.yml +### 4. Create Environment File -Information in the `docker-compose.yml` file tells docker which containers (processes) to run and sets various parameters for them. -Settings in the `docker-compose.override.yml` file will overrule and add to those. Note that docker-compose.yml should not be edited since upgrades will replace it. Put all customizations in the override file, since override files will not be overwritten. +{@import ../components/docker_env.md} -Collector settings may need to be edited by the user, so the information that docker uses to run the collectors is specified (only) in the override file. Therefore, docker-compose_override.example.yml must always be copied to docker-compose_override.yml. +### 5. Run the pmacct setup script ```sh -cp docker-compose.override_example.yml docker-compose.override.yml +./setup-pmacct.sh ``` -By default docker will bring up a single sflow collector and a single netflow collector that listen to udp traffic on ports localhost:9998 and 9999. If this matches your case, you don't need to make any changes to the docker-compose.override_example.yml. +This script will use settings in the .env file to create pmacct (ie, nfacctd and sfacctd) config files in conf-pmacct/ from the .ORIG files in the same directory. -- If you have only one collector, remove or comment out the section for the one not needed so the collector doesn't run and simply create empty nfcapd files. -- If the collectors need to listen to different ports, make the appropriate changes here in both the "command:" and "ports:" lines. -- By default, the collectors will save flows to nfcapd files in sflow/ and netflow/ subdirectories in `./data/input_data/` (i.e., the data/ directory in the git checkout). If you need to save the data files to a different location, see the Docker Advanced section. +It will also create docker-compose.override.yml from docker-compose.override_example.yml, or update it if it exists, filling in ${var} values from the .env file. (This is needed since pmacct can't use environment variables directly, like logstash can.) -Other lines in this file you can ignore for now. +Information in the `docker-compose.yml` file tells docker which containers (processes) to run and sets various parameters for them. +Settings in the `docker-compose.override.yml` file will overrule and add to those. Note that docker-compose.yml should not be edited since upgrades will replace it. Put all customizations in the override file, which will not be overwritten. -:::note -If you run into issues, try removing all the comments in the override file as they may conflict with the parsing done by docker-compose, though we have not found this to be a problem. -::: +### 6. Check the override file -### 5. Choose Pipeline Version +Be sure it looks ok and is consistent with the new config files in conf-pmacct/. All environment variables (${x}) should be filled in. -Once you've created the docker-compose.override.xml file and finished adjusting it for any customizations, you're ready to select which image versions Docker should run. +### 7. Run the cron setup script ```sh -./scripts/docker_select_version.sh +./setup-cron.sh ``` -When prompted, select the **same version** you checked out earlier. - -This script will replace the version numbers of docker images in docker-compose.override.yml and docker-compose.yml with the correct values. - -### 6. Create Environment File - -{@import ../components/docker_env.md} -## Testing the Collectors +This script will create docker-netsage-downloads.cron and .sh and restart-logstash-container.cron and .sh files in cron.d/ and bin/ from .ORIG files in the same directories, filling in required information. -At this point, you can start the two flow collectors by themselves by running the following line. If you only need one of the collectors, remove the other from this command. +The downloads cron job runs the downloads shell script, which will get various files required by the pipeline from scienceregistry.grnoc.iu.edu on a weekly basis. +The restart cron job runs the restart shell script, which restarts the logstash container once a day. Logstash must be restarted to pick up any changes in the downloaded files. -(See the next section for how to start all the containers, including the collectors.) +Note that you need to manually check and then copy the .cron files to /etc/cron.d/. ```sh -docker-compose up -d sflow-collector netflow-collector +sudo cp cron.d/docker-netsage-downloads.cron /etc/cron.d/ +sudo cp cron.d/restart-logstash-container.cron /etc/cron.d/ ``` -Subdirectories for sflow/netflow, year, month, and day are created automatically under `data/input_data/`. File names contain dates and times. -These are not text files; to view the contents, use an [nfdump command](http://www.linuxcertif.com/man/1/nfdump/) (you will need to install nfdump). -Files will be deleted automatically by the importer as they age out (the default is to keep 3 days). +Also, manually run the downloads script to immediately download the required external files. -If the collector(s) are running properly, you should see nfcapd files being written every 5 minutes and they should have sizes of more than a few hundred bytes. (Empty files still have header and footer lines.) -See Troubleshooting if you have problems. - -To stop the collectors ```sh -docker-compose down +bin/docker-netsage-downloads.sh ``` -## Running the Collectors and Pipeline +Check to be sure files are in downloads/. + +>Files located in the git checkout that are used by the docker services and cron: +>- the .env file +>- docker-compose.yml and docker-compose.override.yml +>- files in conf-logstash/ +>- non-ORIG files in conf-pmacct/ +>- cron jobs use non-ORIG files in bin/ and cron.d/ and write to logstash-downloads/ +>- logstash may write to or read from logstash-temp/ +> On upgrade, docker-compose.yml, files in conf-logstash, ORIG and example files will be overwritten. + +### 8. Start up the Docker Containers {@import ../components/docker_pipeline.md} +>Note that if the pipeline host is rebooted, the containers will not restart automatically. +> +>If this will be a regular occurance on your host, you can add `restart:always` to each service in the docker-compose.override file (you may need to add any missing services to that file). + +### 9. Check the rabbitMQ UI + +The rabbitMQ user interface can be used to see if there are incoming flows from pmacct processes and if those flows are being comsumed by logstash. + +In your browser, go to ``` https:///rabbit, login with username guest, password guest``` + +### 10. Check for processed flows + +- Ask your contact at IU to check for flows and/or look at dashboards in your grafana portal. Flows should appear after 10-15 minutes. +- Check to be sure the sensor name(s) are correct. +- Check flow sizes and rates to be sure they are reasonable. (If sampling rate corrections are not being done properly, you may have too few flows and flows which are too small.) You IU contact can check to see whether flows have @sampling_corrected=yes (a handful from the startup of netflow collection may not) and to check for unusal tags on the flows. + +If you are not seeing flows, see the Troubleshooting section of the documentation. + + + + diff --git a/website/docs/deploy/docker_troubleshooting.md b/website/docs/deploy/docker_troubleshooting.md index 7cfc2690..0006c10c 100644 --- a/website/docs/deploy/docker_troubleshooting.md +++ b/website/docs/deploy/docker_troubleshooting.md @@ -4,32 +4,53 @@ title: Docker Troubleshooting sidebar_label: Troubleshooting --- -## Troubleshooting - ### If you are not seeing flows after installation **Troubleshooting checklist:** -- Use `docker-compose ps` to be sure the collectors (and other containers) are running. -- Make sure you configured your routers to point to the correct address/port where the collector is running.  +- Use `docker-compose ps` to be sure the all the containers are (still) running. + (If there are no sflow/netflow sensors, the command should be "echo No Sflow/Netflow sensor" and the container state should be Exit 0.) +- Check the logs of the various containers to see if anything jumps out as being invalid.  +- Make sure you configured your routers to point to the correct host and port. - Check iptables on your pipeline host to be sure incoming traffic from the routers is allowed. -- Check to see if nfcapd files are being written. There should be a directory for the year, month, and day in netsage-pipeline/data/input_data/netflow/ or sflow/, and files should be larger than a few hundred bytes. If the files exist but are too small, the collector is running but there are no incoming flows. "nfdump -r filename" will show the flows in a file (you may need to install nfdump). -- Make sure you created .env and docker-compose.override.yml files and updated the settings accordingly, sensorName especially since that identifies the source of the data. -- Check the logs of the various containers to see if anything jumps out as being invalid.  `docker-compose logs $service`, where $service is logstash, importer, rabbit, etc. +- Use tcpdump to be sure there are flows coming into the expected port. - If the final rabbit queue is on an external host, check the credentials you are using and whether iptables on that host allows incoming traffic from your pipeline host. +- Did you create and edit .env? + - Are the numbers of sensors, sensor names, and port numbers correct? + - Make sure you don't have sflows going to a nfacctd process or vise versa. + - Are there names and port numbers for each sensor? + - Are the environment variable names for sensors like *_1, *_2, *_3, etc. with one sequence for sflow and one for netflow? +- Did you run setup-pmacct.sh? +- In docker-compose.override.yml, make sure the ports are set correctly. You will see *port on host : port in container*. (Docker uses its own port numbers internally.) *Port on host* should match what is in .env (the port the router is sending to on the pipeline host). *Port in container* should match what is in the corresponding pmacct config. +- In pmacct config files, make sure amqp_host is set to rabbit (for docker installs) or localhost (for bare metal) +- In 'docker-compose ps' output, be sure the command for the sfacctd_1 container is /usr/local/sbin/sfacctd, similarly for nfacctd. +- In docker-compose.yml and docker-compose.override.yml, make sure "command:"s specify config files with the right _n's (these are actually just the parameters for the commands). + +### If there are too few flows and flow sizes and rates are too low + +The router may not be sending the sampling rate. This should show up as @sampling_corrected = no. +You may need to apply sampling corrections using an advanced logstash option. ### If flow collection stops **Errors:** - See if any of the containers has died using `docker ps` - Check the logs of the various containers to see if anything jumps out as being invalid. Eg, `docker-compose logs logstash`. -- If logstash dies with an error about not finding \*.conf files, make sure conf-logstash/ and directories and files within are readable by everyone (and directories are executable by everyone). The data/ directory and subdirectories need to be readable and writable by everyone, as well. +- If logstash logs say things like *OutOfMemoryError: Java heap space* or *An unexpected connection driver error occured (Exception message: Connection reset)* and the rabbit container is also down... We've seen this before, but are not sure why it occurs. Try stopped everything, restarting docker for good measure, and starting everything up again. (If problems are continuing, it might be a memory issue.) + ``` + docker-compose down + sudo systemctl restart docker + docker-compose up -d + ``` +- If logstash dies with an error about not finding \*.conf files, make sure conf-logstash/, and directories and files within, are readable by everyone (and directories are executable by everyone). +- logstash-downlaods/ and conf-pmacct/ files need to be readable. +- logstash-temp/ needs to be owned (readable and writable) by the logstash user (uid 1000, regardless of whether there is different username associated with uid 1000 on the host). + + +**Memory:** +- If you are running a lot of data, sometimes docker may need to be allocated more memory. The most likely culprit is logstash (java) which is only allocated 4GB of RAM by default. Please see the Docker Advanced guide for how to change. **Disk space:** - If the pipeline suddenly fails, check to see if the disk is full. If it is, first try getting rid of old docker images and containers to free up space: `docker image prune -a` and `docker container prune`. -- Also check to see how much space the nfcapd files are consuming. You may need to add more disk space. You could also try automatically deleting nfcapd files after a fewer number of days (see Docker Advanced). -**Memory:** -- If you are running a lot of data, sometimes docker may need to be allocated more memory. The most -likely culprit is logstash (java) which is only allocated 2GB of RAM by default. Please see the Docker Advanced guide for how to change. diff --git a/website/docs/deploy/docker_upgrade.md b/website/docs/deploy/docker_upgrade.md index 9fac01af..c207ed53 100644 --- a/website/docs/deploy/docker_upgrade.md +++ b/website/docs/deploy/docker_upgrade.md @@ -6,17 +6,17 @@ sidebar_label: Docker - Upgrading To upgrade a previous installment of the Dockerized pipeline, perform the following steps. -### Shut things down +### 1. Shut things down ```sh cd {netsage-pipeline directory} docker-compose down ``` -This will stop and remove all the docker containers, including the importer, logstash, and any collectors. Note that incoming flow data will not be saved during the time the collectors are down. +This will stop and remove all the docker containers. Note that incoming flow data will not be saved during the time the collectors are down. -### Update Source Code +### 2. Update source code -To upgrade to a new release, pull new tags/code from github and docker images from dockerhub. Your customized .env and override files will not be overwritten, nor will data files, cache files, or downloaded support files. +To upgrade to a new release, first pull new code/tags from github. Your customized .env and override files will not be overwritten, nor will files created by startup scripts, cache files, or downloaded support files, though it's always good to make backup copies. ```sh git reset --hard @@ -27,44 +27,35 @@ git pull origin master git reset --hard will obliterate any changes you have made to non-override files, eg, logstash conf files. If necessary, please make sure you commit and save to a feature branch before continuing. ::: -Run these three commands to select the new release you want to run. In the first, replace "{tag}" by the version to run (eg, v1.2.11). When asked by the third, select the same version as the tag you checked out. +Checkout the version of the pipeline you want to run (replace "{tag}" by the version number, eg, v1.2.11) and make sure it's up to date. ```sh git checkout -b {tag} git pull -./scripts/docker_select_version.sh ``` -The docker-compose.yml and docker-compose.override.yml should both now have the version number you selected for pipeline_importer and pipeline_logstash. -### Check/Update Customization Files -Occasionally, something may change which will necessitate editing your override and/or env file. +### 3. Recreate and check custom files -- Compare the new `docker-compose.override_example.yml` file to your `docker-compose.override.yml`. Be sure to check to see if the version of nfdump has changed. Look for lines like `image: netsage/nfdump-collector:`. Make sure the version in your override file matches what is the example file. (You do not need to actually perform any upgrade yourself. This will ensure the correct version is pulled from Docker Hub.) - -- Also, look for`version: "x.x"` at the top. If the version number is different, change it in your docker-compose.override.yml file. (This is the Compose file format version.) - - -- Compare your `.env` file with the new `env.example` file to see if any new lines or sections have been added. If there have been any changes relevant to your deployment, eg, new options you want to use, copy the changes into your .env file. - -- If you used the Docker Advanced guide to make a `netsage_override.xml` file, compare it to `netsage_shared.xml` to see if there are any changes. This is unlikely. - - -### Update Docker Containers - -This should be done automatically when you start up the containers, but you can also pull new images from Docker Hub now. +Compare .env and docker-compose.override.yml to their example files to see if any changes have been made. (Expect the example files to have environment variables that have gotten filled in in the non-example files.) Copy in any updates, particularly any relevant ones or just recreate them as you did during installation. +Run the pmacct setup script to recreate the pmacct config files, in case there have been any changes. This might also update the override file. +```sh +./setup-pmacct.sh ``` -docker-compose pull + +Rerun the cron setup script to recreate the non-ORIG files in bin/ and cron.d/. Compare the resulting .cron files in the cron.d/ directory to those in /etc/cron.d/. If any have changed, copy them to /etc/cron.d/. +```sh +./setup-cron.sh ``` -### Restart all the Docker Containers +### 4. Restart all the Docker Containers ``` docker-compose up -d ``` -This will start all the services/containers listed in the docker-compose.yml and docker-compose.override.yml files, including the importer, logstash pipeline, and collectors. +This will start all the services/containers listed in the docker-compose.yml and docker-compose.override.yml files, pulling down any new docker images that are required. -### Delete old images and containers +### 5. Delete old images and containers To keep things tidy, delete any old images and containers that are not being used. diff --git a/website/docs/pipeline/elasticsearch.md b/website/docs/pipeline/elasticsearch.md index 05decdef..0b816bc0 100644 --- a/website/docs/pipeline/elasticsearch.md +++ b/website/docs/pipeline/elasticsearch.md @@ -16,10 +16,10 @@ Flow data is ultimately saved to Elasticsearch. Following are the fields that ar |es_doc_id |4f46bef884... |Hash of meta.id and start time. May be used as doc id in ES to prevent duplicates, but see Notes elsewhere.| |meta.flow_type |sflow |'sflow', 'netflow', or 'tstat'| |meta.protocol |tcp |Protocol used| -|meta.sensor_id |snvl2-pw-sw-1-mgmt-2.cenic.net|Sensor name (set in importer config, may not always be a hostname) | -|meta.sensor_group |CENIC |Sensor group, usually the network | -|meta.sensor_type |Regional Network |Sensor type ('Circuit', 'Regional Network', etc) | -|meta.country_scope |Domestic |'Domestic', 'International', or 'Mixed', depending on countries of src and dst| +|meta.sensor_id |GEANT NY to Paris |Assigned sensor name | +|meta.sensor_group |GEANT |Sensor group, usually the network | +|meta.sensor_type |Circuit |Sensor type ('Circuit', 'Regional Network', etc) | +|meta.country_scope |International |'Domestic', 'International', or 'Mixed', depending on countries of src and dst (Domestic = src and dst in USA)| |meta.is_network_testing |no |'yes' if discipline is 'CS.Network Testing and Monitoring' or port is one used for PerfSonar: 5001, 5101, or 5201| ### Source Fields (Destination Fields similarly with "dst") diff --git a/website/docs/pipeline/intro.md b/website/docs/pipeline/intro.md index 5534cf58..17b90a10 100644 --- a/website/docs/pipeline/intro.md +++ b/website/docs/pipeline/intro.md @@ -5,36 +5,41 @@ sidebar_label: Intro --- ## Network Flows -A flow is defined as a series of packets with the same source IP and port, destination IP and port, and protocal (the "5-tuple"). +As is well known, communication between two computers is accomplished by breaking up the information to be sent into packets which are forwarded through routers and switches from the source to the destination. A **flow** is defined as a series of packets with common characteristics. Normally these are the source IP and port, the destination IP and port, and the protocal (the **5-tuple**). These flows can be detected and analyzed to learn about the traffic going over a certain circuit, for example. -## The NetSage Pipeline +> Note that when there is a "conversation" between two hosts, there will be two flows, one in each direction. Note also that determining when the flow ends is somewhat problematic. A flow ends when no more matching packets have been detected for some time, but exactly how much time? A router may declare a flow over after waiting just 15 seconds, but if one is interested in whole "conversations," a much longer time might make more sense. The source port of flows is normally ephemeral and a particular value is unlikely to be reused in a short time unless the packets are part of the same flow, but what if packets with the same 5-tuple show up after 5 or 10 or 30 minutes? Are they part of the same flow? + +## Flow Export + +Network devices such as routers can function as **flow exporters** by simply configuring and enabling flow collection. All or nearly all come with this capability. -The Netsage Flow Processing Pipeline is composed of several components for processing network flow data, including collection, deidentification, metadata tagging, flow stitching, etc. -There are many ways the components can be combined, configured, and run. These documents will describe the standard "simple" set up and provide information for more complex configurations. +There are three main types of flow exporters: **[sflow](https://www.rfc-editor.org/info/rfc3176)**, **[netflow/IPFIX](https://www.cisco.com/c/en/us/products/collateral/ios-nx-os-software/ios-netflow/prod_white_paper0900aecd80406232.html))** and **[tstat](http://tstat.polito.it/)**. Sflow data is composed of sampled packets, while netflow (the newest version of which is IPFIX) and tstat consist of information about series of packets. These are described further in the following sections. -### Flow Export +For Netsage, flow exporters, also referred to as **sensors**, are configured to send the flow data to a **Netsage Pipeline host** for processing. -In Netsage, "sensor(s)" are "flow exporters," i.e., network devices such as routers that are configured to collect flow data ([tstat](http://tstat.polito.it/), [sflow](https://www.rfc-editor.org/info/rfc3176), or [netflow/IPFIX](https://www.cisco.com/c/en/us/products/collateral/ios-nx-os-software/ios-netflow/prod_white_paper0900aecd80406232.html)) and send it to a "Netsage pipeline" on a "pipeline host" for processing. +## The NetSage Pipeline + +The **Netsage Flow Processing Pipeline** processes network flow data. It is comprised of several components that collect the flows, add metadata, stitch them into longer flows, etc. ### Pipeline Components The Netsage Flow Processing Pipeline is made of the following components - - [Pmacct](https://github.com/pmacct/pmacct): the pmacct package includes sfacctd and nfacctd daemons which receive sflow and netflow/IPFIX flows, respectively. They can also do some processing and filtering, but we use these options very minimally. They send the flows to a rabbitmq queue. - - [RabbitMQ](https://www.rabbitmq.com/): Used for message queueing and passing at a couple of points in the full pipeline. - - [Logstash](https://www.elastic.co/logstash): A logstash pipeline performs a variety of operations on the flow data to transform it and add additional information. ([Doc](logstash.md)) - - [Elasticsearch](https://www.elastic.co/what-is/elasticsearch): Used for storing the final flow data. - -Sflow and netflow should be configured to send data to ports on the pipeline host (a different port for each sensor). Pmacct processes will be listening on those ports. + - **[Pmacct](https://github.com/pmacct/pmacct)**: The pmacct ("p-m-account") package includes sfacctd and nfacctd daemons which receive sflow and netflow/IPFIX flows, respectively. They can also do some processing and filtering, but we use these options very minimally. (Pmacct includes other daemons, as well, but we do not use them. Here, "pmacct" will refer to sfacctd and nfacctd in general.) These daemons send the flows to a rabbitmq queue. + - **[RabbitMQ](https://www.rabbitmq.com/)**: Rabbitmq is used for message queueing and passing at a couple of points in the full pipeline. + - **[Logstash](https://www.elastic.co/logstash)**: A logstash pipeline performs a variety of operations on the flow data to transform it and add additional information. ([Doc](logstash.md)) + - **[Elasticsearch](https://www.elastic.co/what-is/elasticsearch)**: Elasticsearch is used for storing the final flow data. -Tstat flow data can be sent directly to the ingest RabbitMQ queue on the pipeline host using the Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) tool. This can be installed as usual or via Docker. +> Sflow and netflow exporters should be configured to send data to ports on the pipeline host (a different port for each sensor). Pmacct processes will be configured to listen on those ports. +> +> Tstat flow data can be sent directly to the ingest RabbitMQ queue on the pipeline host using the Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) tool. This can be installed as an rpm or via Docker. ### Pipeline Installation -Originally, the pipeline was deployed by installing all of the components individually on one or more servers (the "BareMetal" or "Manual" Install). More recently, we've also added a Docker deployment option. For simple scenerios having just one sflow and/or one netflow sensor (and any number of tstat sensors), the basic "Docker Installation" should suffice. The "Docker Advanced Options" guide will help when there are more sensors and/or other customizations required. +Originally, the pipeline was deployed by installing all of the components individually on one or more servers (the "Bare Metal" or "Manual" Install). We still use this deployment method at IU. More recently, we've also added a Docker deployment option. For simple scenerios having just one sflow and/or one netflow sensor (and any number of tstat sensors), the basic "Docker Installation" should suffice. The "Docker Advanced Options" guide will help when there are more sensors and/or other customizations required. ## Visualization -[Grafana](https://grafana.com/oss/grafana/) or [Kibana](https://www.elastic.co/kibana) (with appropriate credentials) can be used to visualize the data stored in elasticsearch. Netsage grafana dashboards are available in github [here](https://github.com/netsage-project/netsage-grafana-configs). +[Grafana](https://grafana.com/oss/grafana/) or [Kibana](https://www.elastic.co/kibana) (with appropriate credentials) can be used to visualize the data stored in elasticsearch. Netsage grafana dashboards or **portals** are set up by the IU team. These are saved in github [here](https://github.com/netsage-project/netsage-grafana-configs). diff --git a/website/docs/pipeline/logstash.md b/website/docs/pipeline/logstash.md index 76a7398f..ea8bcee8 100644 --- a/website/docs/pipeline/logstash.md +++ b/website/docs/pipeline/logstash.md @@ -4,15 +4,15 @@ title: Logstash Pipeline sidebar_label: Logstash --- -The Logstash portion of the Netsage Pipeline reads flows from a RabbitMQ queue, performs various transformations and adds additional information to them, then sends them to a location specified in the output logstash config, eventually ending up in an Elasticsearch instance. +The Logstash portion of the Netsage Pipeline reads flows from a RabbitMQ queue, performs various transformations and adds additional information to them, then sends them to a rabbitMQ queue on a different host, and eventually the data ends up in an Elasticsearch instance. -Logstash config files invoke various logstash "filters" and actions. In the bare metal installation, these conf files are located in /etc/logstash/conf.d/. In a docker installation, the *.conf files in the git checkout, in conf-logstash/, are used. See below for a brief description of what each does and check the files for comments. +Logstash .conf files invoke various "filters" and actions. In the bare metal installation, these conf files are located in /etc/logstash/conf.d/. In a docker installation, the *.conf files in the git checkout, in conf-logstash/, are used. See below for a brief description of what each does and check the files for comments. -Notes: - - All \*.conf files in conf.d/ or conf-logstash/ are executed in alphabetical order, as if they were one huge file. Those ending in .disabled will not be executed (assuming 'path.config: "/etc/logstash/conf.d/*.conf"' in /etc/logstash/pipelines.yml). - - If actions in a particular .conf file are not needed in your particular case, they can be removed or the file disabled, but check carefully for effects on downstream configs. - - MaxMind, CAIDA, and Science Registry database files required by the geoip and aggregate filters are downloaded from scienceregistry.netsage.global via cron jobs weekly or daily. (MaxMind data can change weekly, CAIDA quarterly, Science Registry information randomly.) **NOTE that new versions won't be used in the pipeline until logstash is restarted.** There is a cron file to do this also, though it's not running in Docker deployments. Similarly for other support files, eg, those used in 90-additional-fields.conf. - - Lookup tables for 55-member-orgs.conf that we have compiled are available from sciencregistry.grnoc.iu.edu. See the cron files provided. These will not be updated often, so you may run the cron jobs or not. You will need to provide lists for other networks yourself or ask us. +>Notes: +> - All \*.conf files in conf.d/ or conf-logstash/ are executed in alphabetical order, as if they were one huge file. Those ending in .disabled will not be executed (assuming 'path.config: "/etc/logstash/conf.d/*.conf"'). +> - If you are not running a standard Netsage pipeline and actions in a particular .conf file are not needed in your particular case, they or the whole .conf file can be removed, but check carefully for downstream effects. +> - MaxMind, CAIDA, and Science Registry database files required by the geoip and aggregate filters are downloaded from scienceregistry.netsage.global via cron jobs on a weekly or daily basis. (MaxMind data can change weekly, CAIDA quarterly, Science Registry information randomly.) **NOTE that new versions won't be used in the pipeline until logstash is restarted.** There is a cron file to do this also. Similarly for other support files, eg, those used in 90-additional-fields.conf. +> - Lookup tables for 55-member-orgs.conf that we have compiled are available from sciencregistry.grnoc.iu.edu. See the cron files provided. These will not be updated often, so you may run the cron jobs or not. You will need to provide lists for other networks yourself or ask us. ## Logstash Sequence diff --git a/website/docs/pipeline/pmacct.md b/website/docs/pipeline/pmacct.md index a896dd6e..3b67fd74 100644 --- a/website/docs/pipeline/pmacct.md +++ b/website/docs/pipeline/pmacct.md @@ -5,7 +5,7 @@ sidebar_label: Pmacct --- As flow data comes into the pipeline host, it is received by nfacctd and sfacctd processes which are listening on the proper ports. These do sampling corrections, add sensor name information, and send the flows to a rabbitmq queue. -Netsage also uses sfacctd to do some preliminary aggregation for sflow, to cut down on the work that logstash needs to do. By default, all samples, with the same 5-tuple, within each 5 minute window are aggregated into one incoming flow. +Netsage also uses sfacctd to do some preliminary aggregation for sflow, to cut down on the work that logstash needs to do. By default, all samples, with the same 5-tuple, within each 5 minute window are aggregated into one incoming raw flow. ### Configuration For netsage, pretag.map files are required, one for each nfacctd or sfacctd process. In the bare-metal installation, these are in /etc/pmacct/. For the default docker deployment, we have one for sflow, one for netflow: sfacct-pretag.map and nfacct-pretag.map. These specify the sensor names which are added to the flows. See the comments in the files and the Deployment pages in these docs. diff --git a/website/docs/pipeline/sensors.md b/website/docs/pipeline/sensors.md index 45770f6e..f1b6f84c 100644 --- a/website/docs/pipeline/sensors.md +++ b/website/docs/pipeline/sensors.md @@ -4,11 +4,11 @@ title: Sflow/Netflow Data Export sidebar_label: Sflow/Netflow Data --- -Sflow and Netflow (including IPFIX) export can be configured on appropriate network devices. Routers and switches have flow export capabililties built in, although they can somtimes be buggy. -We have assumed that each sensor sends flow data to a different port on the pipeline host. Certainly if different sensors use different sampling rates, this needs to be adhered to. +Sflow and netflow/IPFIX export can be configured on appropriate network devices. Routers and switches will have at least one of these capabililties built in, although it can somtimes be buggy. -Sflow collects samples of packets passing through the device and sends them to a collector. The sampling rate can be configured, eg, 1 out of every 100 packets. It is assumed that, in our example, each observed packet represents 100 similar packets. To approximately correct for sampling, the number of bytes in the packet is multiplied by 100. The sampling rate compared to the number of packets per second flowing through the device determines how accurate this approximation is. It is of course, least accurate for very short flows. +We have assumed that each exporter/sensor will send flow data to a different port on the pipeline host. Certainly if different sensors use different sampling rates, this needs to be adhered to. The pipeline uses the port number to recognize which sensor the flows are coming from and tag them with the name of that sensor. -Netflow may also sample packets, and the same sampling corrections apply, but it also keeps track of the flows and aggregates by the so-called 5-tuple (source and destination IPs, ports, and protocol). The "active timeout" determines how often netflow sends out an "update" on the flows it is aggregating. The "inactive timeout" determines how long to wait for another matching packet, that is when to declare that a flow has ended. -Typically, the active timeout is 1 minute and the inactive timeout 15 seconds. For flows longer than 1 minute, an "update" is sent out every minute. The tricky thing is that these updates all have the same start time (the time the first packet was observed), although the end time (the time the last packet was observed) and duration change, and the number of bytes and packets reported corresponds only to the period since the last update. -The netsage pipeline attempts to combine the updates to aggregate (and also break up) long flows correctly. +Sflow exporters simply collect individual samples of packets passing through the device and send them to a collector (pmacct in our case). The sampling rate can be configured, eg, 1 out of every 100 packets. To approximately correct for the fact that most packets are not detected, one assumes that each sampled packet represents N others and multiplies the number of bytes in the sampled packet by the sampling rate N, eg, 100. The sampling rate compared to the number of packets per second flowing through the device determines how accurate this approximation is. Sampling is, of course, least accurate for shorter flows since their packets will be more likely to be missed and the correction applied may overestimate the number of packets and bytes. Discussions of accuracy and sampling rates can be found online. The netsage pipeline then looks for matching packets to aggregate into flows. + +Netflow also commonly samples packets, and the same sampling corrections apply, but it also keeps track of the flows and aggregates by the 5-tuple (source and destination IPs, ports, and protocol) on the router. The **active timeout** determines how often netflow sends out an "update" on the flows it is aggregating. The **inactive timeout** determines how long to wait for another matching packet before declaring that a flow has ended. +Typically, the active timeout is 1 minute and the inactive timeout 15 seconds. This means that for flows longer than 1 minute, a "netflow update" is sent out every minute. The tricky thing is that these update-flows all have the same start time (the time the first packet was observed). The end time (the time the last packet was observed) and duration change, but the number of bytes and packets reported corresponds only to the period since the last update. The netsage pipeline attempts to combine these updates to aggregate long flows correctly. diff --git a/website/docs/pipeline/tstat.md b/website/docs/pipeline/tstat.md index dc16a413..0d537671 100644 --- a/website/docs/pipeline/tstat.md +++ b/website/docs/pipeline/tstat.md @@ -4,9 +4,9 @@ title: Tstat Data Export sidebar_label: Tstat Data --- -[Tstat](http://tstat.polito.it/) is a passive sniffer that provides insights into traffic patterns. +**[Tstat](http://tstat.polito.it/)** is a passive sniffer that provides insights into traffic patterns. -The Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) project provides client programs to parse the captured data and send it to a rabbitmq host where it can then be processed by the [logstash pipeline](logstash), stored in elasticsearch, and finally displayed in our Grafana [dashboards](https://github.com/netsage-project/netsage-grafana-configs). +The **Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) project** provides client programs to parse the captured data and send it to a rabbitmq host where it can then be processed by the [logstash pipeline](logstash), stored in elasticsearch, and finally displayed in our Grafana [dashboards](https://github.com/netsage-project/netsage-grafana-configs). Docker images exist on Docker Hub for tstat and tstat_transport. This is still in a beta state and is in development. The initial documentation is available [here](https://github.com/netsage-project/tstat-transport/blob/master/docs/docker.md). From d00d09b64b55986250f457c23de1c41e5a254bdb Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 11 Aug 2022 16:11:37 +0000 Subject: [PATCH 104/126] adding a .gitignore file --- logstash-temp/.gitignore | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 logstash-temp/.gitignore diff --git a/logstash-temp/.gitignore b/logstash-temp/.gitignore new file mode 100644 index 00000000..5e7d2734 --- /dev/null +++ b/logstash-temp/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore From c115338e1dcccd6fe67208e34ceeee75c4ffdc02 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Mon, 15 Aug 2022 21:58:47 +0000 Subject: [PATCH 105/126] Documentation updates for 2.0 --- website/docs/components/docker_env.md | 23 ---- website/docs/components/docker_first_steps.md | 26 ----- website/docs/components/docker_pipeline.md | 33 ------ .../docs/deploy/docker_install_advanced.md | 41 ++++--- website/docs/deploy/docker_install_simple.md | 81 ++++++++++--- website/docs/deploy/docker_troubleshooting.md | 65 ++++++----- website/docs/deploy/docker_upgrade.md | 17 ++- website/docs/devel/docker.md | 110 ++++++++++-------- website/docs/devel/pipeline_dataset.md | 8 +- website/docs/pipeline/intro.md | 12 +- website/docs/pipeline/logstash.md | 8 +- website/docs/pipeline/pmacct.md | 11 +- website/docs/pipeline/sensors.md | 11 +- website/docs/pipeline/tstat.md | 4 + 14 files changed, 229 insertions(+), 221 deletions(-) delete mode 100644 website/docs/components/docker_env.md delete mode 100644 website/docs/components/docker_first_steps.md delete mode 100644 website/docs/components/docker_pipeline.md diff --git a/website/docs/components/docker_env.md b/website/docs/components/docker_env.md deleted file mode 100644 index ca69aaaa..00000000 --- a/website/docs/components/docker_env.md +++ /dev/null @@ -1,23 +0,0 @@ -Next, copy `env.example` to `.env` then edit the .env file to set the sensor names, ports, and where to send processed flows. - -```sh -cp env.example .env -``` - -:::note -Sensor names uniquely identify the source of the data and will be shown in the Grafana dashboards so they should be understandable by a general audience. For example, your sensor names might be "MyNet New York Sflow" or "MyNet New York to London". (Running your proposed names by a Netsage admin would be helpful.) -::: - -- By default, the number of sflowSensors and netflowSensors is set to 1 at the top. If you know from the start that you will have only 1 sensor, set either sflowSensors or netflowSensors to 0 and comment out the sensor name and port below. - - If you will have more than 1 of one type of sensor, see the Docker Advanced Options documentation. - -- In the next section of the .env file, declare the name of sflow sensor 1 and the port to which the exporter is sending the flows. Similarly for netflow sensor 1. - -- You will also want to edit the **rabbit_output** variables. This section defines where the final data will land after going through the pipeline. By default, it will be written to a rabbitmq queue on `rabbit`, ie, the local rabbitMQ server running in the docker container, but there is nothing provided to do anything further with it. - - :::note - To send processed flow data to Indiana University, you will need to obtain settings for this section from your contact. A new queue may need to be set up at IU, as well as allowing traffic from your pipeline host. (At IU, data from the this final rabbit queue will be moved into an Elasticsearch instance for storage and viewing in Netsage Portals.) - ::: - - diff --git a/website/docs/components/docker_first_steps.md b/website/docs/components/docker_first_steps.md deleted file mode 100644 index 9a75fb05..00000000 --- a/website/docs/components/docker_first_steps.md +++ /dev/null @@ -1,26 +0,0 @@ -#### saving this for now in case I need to put it back ####### - -Then checkout the latest version of the code. If you are a developer you'll want the latest version from master, otherwise please use make sure -you've checked out the latest tagged version. - -For example, -```sh -## Normal Deployment, eg, checkout version 1.2.8 -$ git fetch -$ git checkout v1.2.8 -b v1.2.8 - -## Developers -$ git fetch -$ git reset --hard origin/master -``` - -:::warning -git reset --hard will obliterate any changes. On initial installation, you should not have any, but if you do wish to save any state, please make sure you commit and backup to a feature branch before continuing - -Example: -```git commit -a -m "Saving local state"; git checkout -b feature/backup; git checkout master``` -::: - - -All instructions that follow assume these first steps were performed succesfully. If not, you'll likely run into errors down the line if the code doesn't match up with the instructions provided. - diff --git a/website/docs/components/docker_pipeline.md b/website/docs/components/docker_pipeline.md deleted file mode 100644 index efe8c944..00000000 --- a/website/docs/components/docker_pipeline.md +++ /dev/null @@ -1,33 +0,0 @@ -Start up the pipeline (all containers) using - -```sh -docker-compose up -d -``` - -This command will pull down all required docker images and start all the services/containers as listed in the docker-compose.yml and docker-compose.override.yml files. -In general, it will also restart any containers/processes that have died. "-d" runs containers in the background. - -You can see the status of the containers and whether any have died (exited) using these commands -```sh -docker-compose ps -docker container ls -``` - -To check the logs for each of the containers, run - -```sh -docker-compose logs logstash -docker-compose logs rabbit -docker-compose logs sfacctd_1 -docker-compose logs nfacctd_1 -``` - -Add `-f`, e.g. `-f logstash` to see new log messages as they arrive. `--timestamps`, `--tail`, and `--since` are also useful -- look up details in Docker documentation. - -To shut down the pipeline (all containers) use - -```sh -# docker-compose down -``` - -Run all commands from the netsage-pipeline/ directory. diff --git a/website/docs/deploy/docker_install_advanced.md b/website/docs/deploy/docker_install_advanced.md index c8758d33..179ef890 100644 --- a/website/docs/deploy/docker_install_advanced.md +++ b/website/docs/deploy/docker_install_advanced.md @@ -16,7 +16,7 @@ Any number of sensors can be accomodated, although if there are more than a few #### a. Edit environment file -As an example, say we have three netflow sensors. In the .env file, first set `netflowSensors=3`. Then, in the next section, add the names and ports for the additional sensors using variable names ending with _2 and _3. Set the port numbers to those you have used. +As an example, say we have three netflow sensors. In the .env file, first set `netflowSensors=3`. Then, in the next section, add the actual sensor names and ports for the additional sensors using variable names ending with _2 and _3. An example: ``` netflowSensorName_1=The 1st Netflow Sensor Name @@ -31,7 +31,7 @@ netflowPort_3=9002 #### b. Edit docker-composeoverride_example.yml -Add more nfacctd services to the example override file. When copying and pasting, replace _1 with _2 or _3 in three places! +Add more nfacctd services to the example override file. When copying and pasting, replace _1 with _2 or _3 in three places! Your file should look look something like this (remember you'll need to do this again after an upgrade! We need to fix the script to do this automatically): ``` nfacctd_1: @@ -63,7 +63,7 @@ Check docker-compose.override.yml and files in conf-pmacct/ for consistency. #### d. Start new containers -If you are simply adding new collectors nfacctd_2 and nfacctd_3, and there are no changes to nfacctd_1, you can simply start up the new containers with +If you are simply adding new collectors nfacctd_2 and nfacctd_3, and there are no changes to nfacctd_1, you should be able to start up the additional containers with ```sh docker-compose up -d @@ -72,11 +72,13 @@ docker-compose up -d Otherwise, or to be safe, bring everything down first, then back up. ## To Filter Flows by Interface -If your sensors are exporting all flows, but only those using particular interfaces are relevant, use this option in the .env file. All incoming flows will be read in, but the logstash pipeline will drop those that do not have src_ifindex OR dst_inindex equal to one of those listed. (This may create a lot of extra work and overwhelm logstash, so if at all possible, try to limit the flows at the router level or using iptables.) +If your sensors are exporting all flows, but only those using particular interfaces are relevant, use this option in the .env file. All incoming flows will be read in, but the logstash pipeline will drop those that do not have src_ifindex OR dst_inindex equal to one of those listed. (Processing a large number of unecessary flows may overwhelm logstash, so if at all possible, try to limit the flows at the router level or using iptables.) In the .env file, uncomment lines in the appropriate section and enter the information required. "ALL" can refer to all sensors or all interfaces of a sensor. If a sensor is not referenced at all, all of its flows will be kept. Be sure `ifindex_filter_flag=True` with "True" capitalized as shown, any sensor names are spelled exactly right, and list all the ifindex values of flows that should be kept and processed. Use semicolons to separate sensors. Some examples (use just one!): ```sh +ifindex_filter_flag=True +*examples:* ifindex_filter_keep=ALL:123 ifindex_filter_keep=Sensor 1: 123 ifindex_filter_keep=Sensor 1: 456, 789 @@ -104,7 +106,7 @@ subnet_filter_keep=Sensor A Name: 123.45.6.0/16; Sensor B Name: 123.33.33.0/24, ## To Change a Sensor Name Depending on the Interface Used In some cases, users want to keep all flows from a certain sensor but differentiate between those that enter or exit through a specific interface by using a different sensor name. -In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all 4 fields are set properly! For example, +In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all four fields are set properly! For example, ```sh ifindex_sensor_rename_flag=True @@ -132,34 +134,47 @@ sampling_correction_factor=512 In this example, all flows from sensors "IU Bloomington Sflow" and "IU Indy Sflow" will have a correction factor of 512 applied by logstash. Any other sensors will not have a correction applied by logstash (presumably pmacct would apply the correction automatically). -Note that if pmacct has made a sampling correction already, no additional manual correction will be applied, even if these options are set, -so this can be used *to be sure* a sampling correction is applied. +>Note that if pmacct has made a sampling correction already, no additional manual correction will be applied, even if these options are set, +>so this can be used *to be sure* a sampling correction is applied. -## To NOT deidentify flows +## To NOT Deidentify Flows -Normally all flows are deidentified before being saved to elasticsearch by dropping by truncating the src and dst IP addresses. If you do NOT want to do this, set full_IPs_flag to True. (You will most likely want to request access control on the grafana portal, as well.) +Normally all flows are deidentified before being saved to elasticsearch by truncating the src and dst IP addresses. If you do NOT want to do this, set full_IPs_flag to True. (You will most likely want to request access control on the grafana portal, as well.) ``` # To keep full IP addresses, set this parameter to True. full_IPs_flag=True ``` -## To Customize Java Settings / Increase Memory Available for Lostash +## To Increase Memory Available for Lostash -If cpu or memory use seems to be a problem, try increasing the java JVM heap size for logstash from 4GB to no more than 8. +If cpu or memory usage seems to be a problem, try increasing the java JVM heap size for logstash from 4GB to no more than 8. -To do this, edit LS_JAVA_OPTS in the .env file. +To do this, edit LS_JAVA_OPTS in the .env file. E.g., ```yaml LS_JAVA_OPTS=-Xmx8g -Xms8g ``` -Here are some tips for adjusting the JVM heap size (https://www.elastic.co/guide/en/logstash/current/jvm-settings.html): +Here are some tips for adjusting the JVM heap size (see https://www.elastic.co/guide/en/logstash/current/jvm-settings.html): - Set the minimum (Xms) and maximum (Xmx) heap allocation size to the same value to prevent the heap from resizing at runtime, which is a very costly process. - CPU utilization can increase unnecessarily if the heap size is too low, resulting in the JVM constantly garbage collecting. You can check for this issue by doubling the heap size to see if performance improves. - Do not increase the heap size past the amount of physical memory. Some memory must be left to run the OS and other processes. As a general guideline for most installations, don’t exceed 50-75% of physical memory. The more memory you have, the higher percentage you can use. +## To Overwrite Organization Names When an ASN is Shared +Source and destination organization names come from lookups by ASN or IP in databases provided by CAIDA or MaxMind. (The former is preferred, the latter acts as a backup.) +Sometimes an organization that owns an AS and a large block of IPs will allow members or subentities to use certain IP ranges within the same AS. +In this case, all flows to and from the members will have src or dst organization set to the parent organization's name. If desired, the member organizations' names can be substituted. To do requires the use of a "member list" which specifies the ASN(s) that is being shared and the IP ranges for each member. + +See **conf-logstash/support/networkA-members-list.rb.example** for an example. + +## To Tag Flows with Science Discipline Information + +At https://scienceregistry.netsage.global, you can see a hand-curated list of resources (IP blocks) which are linked to the organizations, sciences, and projects that use them. This information is used by the Netsage pipeline to tag science-related flows. If you would like to see your resources or projects included, please contact us to have them added to the Registry. + + + ## To Bring up Kibana and Elasticsearch Containers The file docker-compose.develop.yaml can be used in conjunction with docker-compose.yaml to bring up the optional Kibana and Elastic Search components. diff --git a/website/docs/deploy/docker_install_simple.md b/website/docs/deploy/docker_install_simple.md index f17f2dc3..6b85b4ef 100644 --- a/website/docs/deploy/docker_install_simple.md +++ b/website/docs/deploy/docker_install_simple.md @@ -24,7 +24,7 @@ sudo systemctl docker start Install Docker Compose from Docker's GitHub repository - see [https://docs.docker.com/compose/install/](https://docs.docker.com/compose/install/). You need to **specify version 1.29.2** (or newer) in the curl command. -Check default file permissions. If the *logstash* user is not able to access the logstash config files in the git checkout, you'll get an error from logstash saying there are no .conf files found even though they are there. Various components also need to be able to read and write to the data/ directory in the checkout. Defaults of 775 (u=rwx, g=rwx, o=rx) should work. +Check which file permissions new files are created with. If the *logstash* user is not able to access the logstash config files in the git checkout, you'll get an error from logstash saying there are no .conf files found even though they are there. Defaults of 775 (u=rwx, g=rwx, o=rx) should work. ### 2. Set up Data Sources The data processing pipeline needs data to ingest in order to do anything, of course. There are three types of data that can be consumed. @@ -38,11 +38,11 @@ You can do this step later, but it will helpful to have it working first. Check Configure sflow and netflow to send flow data to the pipeline host. Each sensor/router should send to a different port. You will list the port numbers in the .env file (see below). -Usually default settings are ok. For netflow/IPFIX, the active timeout is typically 1 minute; also have it send templates every minute or so. +Usually default settings are ok. (Please share your settings with us.) On the pipeline host, configure the firewall to allow incoming traffic from the flow exporters, of course. -Tstat data should be sent directly to the logstash input rabbit queue "netsage_deidentifier_raw" on the pipeline host. No collector is needed for tstat data. See the netsage-project/tstat-transport repo. (From there, logstash will grab the data and process it the same way as it processes sflow/netflow data. (See the Docker Advanced guide.) +Tstat data should be sent directly to the logstash input rabbit queue "netsage_deidentifier_raw" on the pipeline host. No collector is needed for tstat data. See the netsage-project/tstat-transport repo. (From there, logstash will grab the data and process it the same way as it processes sflow/netflow data. ### 3. Clone the Netsage Pipeline Project @@ -61,9 +61,28 @@ git checkout {tag} Replace "{tag}" with the release version you intend to use, e.g., "v2.0.0". ("Master" is the development version and is not intended for general use!) `git status` will confirm which branch you are on, e.g., master or v2.0.0. -### 4. Create Environment File +### 4. Create the Environment File -{@import ../components/docker_env.md} +Next, copy `env.example` to `.env` then edit the .env file to set the sensor names, ports, and where to send processed flows. + +```sh +cp env.example .env +``` + +1. By default, the number of sflowSensors and netflowSensors is set to 1 at the top. If you know from the start that you will have only 1 sensor, set either sflowSensors or netflowSensors to 0 and comment out the sensor name and port below. + + If you will have more than 1 of one type of sensor, see the Docker Advanced Options documentation. + +2. In the next section of the .env file, declare the name of sflow sensor 1 and the port to which the exporter is sending the flows. Similarly for netflow sensor 1. + +3. You will also want to edit the **rabbit_output** variables. This section defines where the final data will land after going through the pipeline. By default, it will be written to a rabbitmq queue on `rabbit`, ie, the local rabbitMQ server running in the docker container, but there is nothing provided to do anything further with it. + + To send processed flow data to Indiana University, you will need to obtain settings for this section from your contact. A new queue may need to be set up at IU, as well as allowing traffic from your pipeline host. (At IU, data from the this final rabbit queue will be moved into an Elasticsearch instance for storage and viewing in Netsage Portals.) + + +:::note +Sensor names uniquely identify the source of the data and will be shown in the Grafana dashboards so they should be understandable by a general audience. For example, your sensor names might be "MyNet New York Sflow" or "MyNet New York to London". (Running your proposed names by a Netsage admin would be helpful.) +::: ### 5. Run the pmacct setup script @@ -73,16 +92,14 @@ Replace "{tag}" with the release version you intend to use, e.g., "v2.0.0". ("M This script will use settings in the .env file to create pmacct (ie, nfacctd and sfacctd) config files in conf-pmacct/ from the .ORIG files in the same directory. -It will also create docker-compose.override.yml from docker-compose.override_example.yml, or update it if it exists, filling in ${var} values from the .env file. (This is needed since pmacct can't use environment variables directly, like logstash can.) - -Information in the `docker-compose.yml` file tells docker which containers (processes) to run and sets various parameters for them. -Settings in the `docker-compose.override.yml` file will overrule and add to those. Note that docker-compose.yml should not be edited since upgrades will replace it. Put all customizations in the override file, which will not be overwritten. +It will also create **docker-compose.override.yml** from docker-compose.override_example.yml, or update it if it exists, filling in ${var} values from the .env file. (This is needed since pmacct can't use environment variables directly, like logstash can.) -### 6. Check the override file +Information in the docker-compose.yml file tells docker which containers (processes) to run and sets various parameters for them. +Settings in the docker-compose.override.yml file will overrule and add to those. Note that docker-compose.yml should not be edited since upgrades will replace it. All customizations go in the override file, which will not be overwritten. -Be sure it looks ok and is consistent with the new config files in conf-pmacct/. All environment variables (${x}) should be filled in. +Check the override file to be sure it looks ok and is consistent with the new config files in conf-pmacct/. All environment variables (${x}) should be filled in. -### 7. Run the cron setup script +### 6. Run the cron setup script ```sh ./setup-cron.sh @@ -119,22 +136,54 @@ Check to be sure files are in downloads/. ### 8. Start up the Docker Containers -{@import ../components/docker_pipeline.md} +Start up the pipeline (all containers) using + +```sh +docker-compose up -d +``` + +This command will pull down all required docker images and start all the services/containers as listed in the docker-compose.yml and docker-compose.override.yml files. +"-d" runs the containers in the background. + +You can see the status of the containers and whether any have died (exited) using these commands +```sh +docker-compose ps +docker container ls +``` + +To check the logs for each of the containers, run + +```sh +docker-compose logs logstash +docker-compose logs rabbit +docker-compose logs sfacctd_1 +docker-compose logs nfacctd_1 +``` + +Add `-f`, e.g. `-f logstash` to see new log messages as they arrive. `--timestamps`, `--tail`, and `--since` are also useful -- look up details in Docker documentation. + +To shut down the pipeline (all containers) use + +```sh +# docker-compose down +``` + +**Run all commands from the netsage-pipeline/ directory.** >Note that if the pipeline host is rebooted, the containers will not restart automatically. > >If this will be a regular occurance on your host, you can add `restart:always` to each service in the docker-compose.override file (you may need to add any missing services to that file). -### 9. Check the rabbitMQ UI +### 9. Check the RabbitMQ User Interface The rabbitMQ user interface can be used to see if there are incoming flows from pmacct processes and if those flows are being comsumed by logstash. -In your browser, go to ``` https:///rabbit, login with username guest, password guest``` +In your browser, go to ``` https:///rabbit ``` Login with username guest, password guest. Look at the small graph showing rates for incoming messages, acks, etc. ### 10. Check for processed flows - Ask your contact at IU to check for flows and/or look at dashboards in your grafana portal. Flows should appear after 10-15 minutes. -- Check to be sure the sensor name(s) are correct. +- Check to be sure the sensor name(s) are correct in the portal. - Check flow sizes and rates to be sure they are reasonable. (If sampling rate corrections are not being done properly, you may have too few flows and flows which are too small.) You IU contact can check to see whether flows have @sampling_corrected=yes (a handful from the startup of netflow collection may not) and to check for unusal tags on the flows. If you are not seeing flows, see the Troubleshooting section of the documentation. diff --git a/website/docs/deploy/docker_troubleshooting.md b/website/docs/deploy/docker_troubleshooting.md index 0006c10c..e4d21d6e 100644 --- a/website/docs/deploy/docker_troubleshooting.md +++ b/website/docs/deploy/docker_troubleshooting.md @@ -4,17 +4,37 @@ title: Docker Troubleshooting sidebar_label: Troubleshooting --- -### If you are not seeing flows after installation +### If you are not seeing flows -**Troubleshooting checklist:** - -- Use `docker-compose ps` to be sure the all the containers are (still) running. +- Be sure allow time for the first flows to timeout in the logstash aggregation - wait at least 10-15 minutes after starting up containers. +- Use `docker-compose ps` to see if all the containers are (still) running. (If there are no sflow/netflow sensors, the command should be "echo No Sflow/Netflow sensor" and the container state should be Exit 0.) -- Check the logs of the various containers to see if anything jumps out as being invalid.  -- Make sure you configured your routers to point to the correct host and port. -- Check iptables on your pipeline host to be sure incoming traffic from the routers is allowed. + +- Check the logs of the various containers to see if anything jumps out as being a problem. +- If logstash logs say things like *OutOfMemoryError: Java heap space* or *An unexpected connection driver error occured (Exception message: Connection reset)* and the rabbit container is also down... We've seen this before, but are not sure why it occurs. Try stopping everything, restarting docker for good measure, and starting all the containers up again. (If problems are continuing, it might be a memory issue.) + ``` + docker-compose down + sudo systemctl restart docker + docker-compose up -d + ``` + +- Check flow export on the network device to be sure it is (still) configured and running correctly. + +- Make sure there really is traffic to be detected (with flows over 10 MB). A circuit outage or simple lack of large flows might be occurring. + + +## Problems most likely to occur at installation: + +- Be sure conf-logstash/ files and dirs are readable by the logstash user (uid 1000, regardless of whether there is different username associated with uid 1000 on the host). A logstash error about not being able to find *.conf files could be caused by a permissions problem. +- Files in logstash-downloads/ and conf-pmacct/ also need to be readable by the logstash user. +- Logstash-temp/ needs to be readable and also writable by the logstash user. + +- Ensure routers are configured to send to the correct host and port and flow export is functioning. +- Check iptables on the pipeline host to be sure incoming traffic from the routers is allowed. - Use tcpdump to be sure there are flows coming into the expected port. -- If the final rabbit queue is on an external host, check the credentials you are using and whether iptables on that host allows incoming traffic from your pipeline host. + +- If the final rabbit queue is on a remote host, eg, at IU, check the credentials you are using and iptables on the remote host. + - Did you create and edit .env? - Are the numbers of sensors, sensor names, and port numbers correct? - Make sure you don't have sflows going to a nfacctd process or vise versa. @@ -26,31 +46,12 @@ sidebar_label: Troubleshooting - In 'docker-compose ps' output, be sure the command for the sfacctd_1 container is /usr/local/sbin/sfacctd, similarly for nfacctd. - In docker-compose.yml and docker-compose.override.yml, make sure "command:"s specify config files with the right _n's (these are actually just the parameters for the commands). -### If there are too few flows and flow sizes and rates are too low - -The router may not be sending the sampling rate. This should show up as @sampling_corrected = no. -You may need to apply sampling corrections using an advanced logstash option. - -### If flow collection stops - -**Errors:** -- See if any of the containers has died using `docker ps` -- Check the logs of the various containers to see if anything jumps out as being invalid. Eg, `docker-compose logs logstash`. -- If logstash logs say things like *OutOfMemoryError: Java heap space* or *An unexpected connection driver error occured (Exception message: Connection reset)* and the rabbit container is also down... We've seen this before, but are not sure why it occurs. Try stopped everything, restarting docker for good measure, and starting everything up again. (If problems are continuing, it might be a memory issue.) - ``` - docker-compose down - sudo systemctl restart docker - docker-compose up -d - ``` -- If logstash dies with an error about not finding \*.conf files, make sure conf-logstash/, and directories and files within, are readable by everyone (and directories are executable by everyone). -- logstash-downlaods/ and conf-pmacct/ files need to be readable. -- logstash-temp/ needs to be owned (readable and writable) by the logstash user (uid 1000, regardless of whether there is different username associated with uid 1000 on the host). - -**Memory:** -- If you are running a lot of data, sometimes docker may need to be allocated more memory. The most likely culprit is logstash (java) which is only allocated 4GB of RAM by default. Please see the Docker Advanced guide for how to change. +##Memory: +- If you are running a lot of data, sometimes docker may need to be allocated more memory. The most likely culprit is logstash (java) which is only allocated 4GB of RAM by default. Please see the Docker Advanced Options guide for how to change. -**Disk space:** -- If the pipeline suddenly fails, check to see if the disk is full. If it is, first try getting rid of old docker images and containers to free up space: `docker image prune -a` and `docker container prune`. +### If there are too few flows and flow sizes and rates are smaller than expected: +The router may not be sending the sampling rate with the flow data. +You may need to apply sampling corrections - see the Docker Advanced Options guide. diff --git a/website/docs/deploy/docker_upgrade.md b/website/docs/deploy/docker_upgrade.md index c207ed53..6dd60a16 100644 --- a/website/docs/deploy/docker_upgrade.md +++ b/website/docs/deploy/docker_upgrade.md @@ -12,11 +12,11 @@ To upgrade a previous installment of the Dockerized pipeline, perform the follow cd {netsage-pipeline directory} docker-compose down ``` -This will stop and remove all the docker containers. Note that incoming flow data will not be saved during the time the collectors are down. +This will stop and remove all the docker containers. Note that incoming flow data will be lost during the time the collector and rabbit containers are down. ### 2. Update source code -To upgrade to a new release, first pull new code/tags from github. Your customized .env and override files will not be overwritten, nor will files created by startup scripts, cache files, or downloaded support files, though it's always good to make backup copies. +To upgrade to a new release, first pull updates from github. Your customized .env and override files will not be overwritten, nor will files created by startup scripts, cache files, or downloaded support files, though it's always good to make backup copies. ```sh git reset --hard @@ -35,18 +35,25 @@ git pull ### 3. Recreate and check custom files -Compare .env and docker-compose.override.yml to their example files to see if any changes have been made. (Expect the example files to have environment variables that have gotten filled in in the non-example files.) Copy in any updates, particularly any relevant ones or just recreate them as you did during installation. +- Compare the .env to env.example to see if any changes have been made. + Copy in any updates, particularly any relevant ones, or just recreate the .env file as you did during installation. + +- Run the pmacct setup script to recreate the pmacct config files, in case there have been any changes. This might also update the override file. -Run the pmacct setup script to recreate the pmacct config files, in case there have been any changes. This might also update the override file. ```sh ./setup-pmacct.sh ``` -Rerun the cron setup script to recreate the non-ORIG files in bin/ and cron.d/. Compare the resulting .cron files in the cron.d/ directory to those in /etc/cron.d/. If any have changed, copy them to /etc/cron.d/. +- Compare the docker-compose.override.yml file to the example. (Expect the example file to have environment variables that have gotten filled in in the non-example file.) If there are new lines or sections that are missing, copy them in. The setup script is not able to handle much in the way of changes. + +- Rerun the cron setup script to recreate the non-ORIG files in bin/ and cron.d/. + ```sh ./setup-cron.sh ``` +- Compare the resulting .cron files in the cron.d/ directory to those in /etc/cron.d/. If any have changed, copy them to /etc/cron.d/. + ### 4. Restart all the Docker Containers ``` diff --git a/website/docs/devel/docker.md b/website/docs/devel/docker.md index 21cb7d5c..1daedc2e 100644 --- a/website/docs/devel/docker.md +++ b/website/docs/devel/docker.md @@ -3,38 +3,86 @@ id: docker_dev_guide title: Docker Dev Guide sidebar_label: Docker Dev Guide --- +## Handy Docker Commands + +### Start the Containers + +``` sh +docker-compose up -d +``` + +### Stop the Containers + +``` sh +docker-compose down +docker-compose stop && docker-compose rm +``` + +### Enter a Container Shell + +``` sh +docker-compose exec logstash bash # run bash shell in logstash container +``` -## Selecting a Version +### View Container Logs -You can use the "master" version or a tagged version. -To select a released version use the docker_select_version.sh script (see the Deployment Guide). -If you wish to use the development version (master branch) simply skip the docker_select_version.sh step. +``` sh +docker-compose logs -f # view logs for all containers +docker-compose logs -f # view logs for container, eg logstash +``` -## Installing +## To Build Docker Images -See the Deployment Guide to learn how to set up collectors, your environment and override files, etc. +We will normally use official images for rabbitMQ, logstash, nfacctd, and sfacctd, so no building of images is required. -## Importer +However, in case there is not an offical image of nfacctd or sfacctd that includes required commits, you may need to build images from master. -The importer "shared" config that Docker uses is defined in compose/netsage_shared.xml. ** NOTE: If you want to make changes to this file, you will need to rebuild the container** +Below are the steps used to build the Docker images for v2.0. In the future, you may not have to apply a patch. (Without the patch, when bringing up the nfacctd or sfacctd container, we got *error while loading shared libraries: libndpi.so.4: cannot open shared object file: No such file or directory*.) -## Build Images +The nfacctd and sfacctd images are just the base image plus specific commands to run. -The images are published on Docker Hub, but if you'd like to incorporate local changes please follow the process below. +``` +You may need to add dns servers from /etc/resolv.conf to /etc/docker/daemon.json and restart docker +$ git clone https://github.com/pmacct/pmacct.git +$ mv pmacct pmacct-30June2022+patch +$ cd pmacct-30June2022/ +$ git checkout 865a81e1f6c444aab32110a87d72005145fd6f74 +$ git submodule update --init --recursive +$ git am -3 0001-ci-docker-fix-docker-multi-stage-build.patch +$ sudo docker build -f docker/base/Dockerfile -t pmacct:base . +$ sudo docker tag pmacct:base base:_build +$ sudo docker build -f docker/nfacctd/Dockerfile -t nfacctd:7Jun2022 . +$ sudo docker build -f docker/nfacctd/Dockerfile -t sfacctd:7Jun2022 . + +$ sudo docker-compose up -d +$ sudo docker-cmopose down +``` -### Build Using Source Code +These steps checkout the code from the desired point in time, get files for submodules, apply the patch that was emailed and saved to ~/lensman/GIT/pmacct-30June2022+patch/0001-ci-docker-fix-docker-multi-stage-build.patch on netsage-pipeline-dev2.bldc, build the base image, rename the base image, build nfacctd and sfacctd images. After building, do a test run (of course, first make the .env file, etc.). When ready, push to the Github Container Registry. -If you would like to build the *importer* container using the version of the pipeline scripts found in the GitHub repo then run the following: -```sh -docker-compose -f docker-compose.build.yml build +## To push images to the GitHub Container Registry +You need to have a personal access token and (presumably) be part of the Netsage Project. The personal access token needs at least the following scopes: repo, read/write/delete:packages. +As an example, here is how lisaens pushed the images for 2.0: +``` +$ sudo docker login ghcr.io -u lisaens +$ sudo docker images (to get the id) + REPOSITORY TAG IMAGE ID CREATED SIZE + sfacctd 7Jun2022 f62b1c6cddbd 5 weeks ago 346MB + nfacctd 7Jun2022 5833977f6dd0 5 weeks ago 346MB +$ sudo docker tag f62b1c6cddbd ghcr.io/netsage-project/sfacctd:7Jun2022 +$ sudo docker push ghcr.io/netsage-project/sfacctd:7Jun2022 +$ sudo docker tag 5833977f6dd0 ghcr.io/netsage-project/nfacctd:7Jun2022 +$ sudo docker push ghcr.io/netsage-project/nfacctd:7Jun2022 +Go to the Netsage Project in github (netsage-project), click on Packages, click on an image, click on Connect to Repository and select Netsage Pipeline, +then go to Package Settings (lower right). In the Danger Zone, click on Change Visibility and choose Public. ``` -NOTE: The importer container includes the config files for the logstash pipeline. +NOTE that the docker-compose.yml file must refer to the images using the registry location, eg, for sfacctd `ghcr.io/netsage-project/sfacctd:7jun2022`. -## Optional: ElasticSearch and Kibana +## Run ElasticSearch and Kibana Containers You can optionally store flow data locally in an ElasticSearch container and view the data with Kibana. Local storage can be enabled with the following steps: @@ -51,33 +99,3 @@ elasticsearch { 3. Run the containers using the following line: ` ` ` docker-compose -f docker-compose.yml -f docker-compose.develop.yml up -d ` ` ` -## Handy Docker Commands - -### Start the Containers - -``` sh -docker-compose up -d -``` - -### Stop the Containers - -``` sh -docker-compose stop && docker-compose rm -``` - -### Enter a Container Shell - -``` sh -docker-compose exec logstash bash #bash shell in logstash container -docker-compose exec importer bash #bash shell in importer container -docker-compose exec rabbit bash #bash shell in rabbit container -``` - -### View Container Logs - -``` sh -docker-compose logs -f #view logs for all containers -docker-compose logs -f logstash #view logs for logstash container -docker-compose logs -f importer #view logs for importer container -docker-compose logs -f rabbit #view logs for rabbit container -``` diff --git a/website/docs/devel/pipeline_dataset.md b/website/docs/devel/pipeline_dataset.md index a061957d..0319cc27 100644 --- a/website/docs/devel/pipeline_dataset.md +++ b/website/docs/devel/pipeline_dataset.md @@ -14,13 +14,9 @@ You can download the files from [here](https://drive.google.com/drive/folders/19 Please take note of which ports the collectors are listing on. Check your docker-compose.override.yml file. If you are using default ports, they should match this [example](https://github.com/netsage-project/netsage-pipeline/blob/master/docker-compose.override_example.yml). -Currently the default ports are: - - 9998/udp for sflow - - 9999/udp for netflow - Naturally the collectors have to be running in order for any of this to be usable. You can read more on how to get them running in the [Docker Simple Deployment Guide](../deploy/docker_install_simple.md#running-the-collectors) -In order to replay the data, use the following commands for netflow and sflow respectively: +In order to replay the data, use nfreplay which is part of the nfdump package. Eg, ### Netflow @@ -30,5 +26,5 @@ nfreplay -H 127.0.0.1 -p 9999 -r nfcapd-ilight-anon-20200114 -v 9 -d 1000 ### Sflow -Coming soon. nfreplay will not work with sflow data type. +nfreplay will not work with sflow data type. diff --git a/website/docs/pipeline/intro.md b/website/docs/pipeline/intro.md index 17b90a10..2067c89d 100644 --- a/website/docs/pipeline/intro.md +++ b/website/docs/pipeline/intro.md @@ -13,7 +13,7 @@ As is well known, communication between two computers is accomplished by breakin Network devices such as routers can function as **flow exporters** by simply configuring and enabling flow collection. All or nearly all come with this capability. -There are three main types of flow exporters: **[sflow](https://www.rfc-editor.org/info/rfc3176)**, **[netflow/IPFIX](https://www.cisco.com/c/en/us/products/collateral/ios-nx-os-software/ios-netflow/prod_white_paper0900aecd80406232.html))** and **[tstat](http://tstat.polito.it/)**. Sflow data is composed of sampled packets, while netflow (the newest version of which is IPFIX) and tstat consist of information about series of packets. These are described further in the following sections. +There are three main types of flow exporters: **[sflow](https://www.rfc-editor.org/info/rfc3176)**, **[netflow/IPFIX](https://www.cisco.com/c/en/us/products/collateral/ios-nx-os-software/ios-netflow/prod_white_paper0900aecd80406232.html))** and **[tstat](http://tstat.polito.it/)**. Sflow data is composed of sampled packets, while netflow (the newest version of which is IPFIX) and tstat data consists of information about series of packets (ie whole flows, or what they consider whole flows). These are described further in the following sections. For Netsage, flow exporters, also referred to as **sensors**, are configured to send the flow data to a **Netsage Pipeline host** for processing. @@ -25,21 +25,17 @@ The **Netsage Flow Processing Pipeline** processes network flow data. It is comp The Netsage Flow Processing Pipeline is made of the following components - - **[Pmacct](https://github.com/pmacct/pmacct)**: The pmacct ("p-m-account") package includes sfacctd and nfacctd daemons which receive sflow and netflow/IPFIX flows, respectively. They can also do some processing and filtering, but we use these options very minimally. (Pmacct includes other daemons, as well, but we do not use them. Here, "pmacct" will refer to sfacctd and nfacctd in general.) These daemons send the flows to a rabbitmq queue. + - **[Pmacct](https://github.com/pmacct/pmacct)**: The pmacct package includes sfacctd and nfacctd daemons which receive sflow and netflow/IPFIX flows, respectively, and send them to a rabbitmq queue. - **[RabbitMQ](https://www.rabbitmq.com/)**: Rabbitmq is used for message queueing and passing at a couple of points in the full pipeline. - - **[Logstash](https://www.elastic.co/logstash)**: A logstash pipeline performs a variety of operations on the flow data to transform it and add additional information. ([Doc](logstash.md)) + - **[Logstash](https://www.elastic.co/logstash)**: A logstash pipeline pulls flow data from a rabbit queue and performs a variety of operations to transform it and add additional information. - **[Elasticsearch](https://www.elastic.co/what-is/elasticsearch)**: Elasticsearch is used for storing the final flow data. -> Sflow and netflow exporters should be configured to send data to ports on the pipeline host (a different port for each sensor). Pmacct processes will be configured to listen on those ports. -> -> Tstat flow data can be sent directly to the ingest RabbitMQ queue on the pipeline host using the Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) tool. This can be installed as an rpm or via Docker. - ### Pipeline Installation Originally, the pipeline was deployed by installing all of the components individually on one or more servers (the "Bare Metal" or "Manual" Install). We still use this deployment method at IU. More recently, we've also added a Docker deployment option. For simple scenerios having just one sflow and/or one netflow sensor (and any number of tstat sensors), the basic "Docker Installation" should suffice. The "Docker Advanced Options" guide will help when there are more sensors and/or other customizations required. ## Visualization -[Grafana](https://grafana.com/oss/grafana/) or [Kibana](https://www.elastic.co/kibana) (with appropriate credentials) can be used to visualize the data stored in elasticsearch. Netsage grafana dashboards or **portals** are set up by the IU team. These are saved in github [here](https://github.com/netsage-project/netsage-grafana-configs). +[Grafana](https://grafana.com/oss/grafana/) or [Kibana](https://www.elastic.co/kibana) (with appropriate credentials) can be used to visualize the data stored in elasticsearch. Netsage grafana dashboards or **portals** are set up by the IU team. The dashboards are saved in github [HERE](https://github.com/netsage-project/netsage-grafana-configs). diff --git a/website/docs/pipeline/logstash.md b/website/docs/pipeline/logstash.md index ea8bcee8..410a4146 100644 --- a/website/docs/pipeline/logstash.md +++ b/website/docs/pipeline/logstash.md @@ -4,19 +4,19 @@ title: Logstash Pipeline sidebar_label: Logstash --- -The Logstash portion of the Netsage Pipeline reads flows from a RabbitMQ queue, performs various transformations and adds additional information to them, then sends them to a rabbitMQ queue on a different host, and eventually the data ends up in an Elasticsearch instance. +The Logstash portion of the Netsage Pipeline reads flows from a RabbitMQ queue, performs various transformations and adds additional information, then sends them to a rabbitMQ queue on a different host. Eventually the data ends up in an Elasticsearch data store. -Logstash .conf files invoke various "filters" and actions. In the bare metal installation, these conf files are located in /etc/logstash/conf.d/. In a docker installation, the *.conf files in the git checkout, in conf-logstash/, are used. See below for a brief description of what each does and check the files for comments. +Logstash .conf files invoke various "filters" and actions. In the bare metal installation, these conf files are located in /etc/logstash/conf.d/. In a docker installation, they are located in the conf-logstash/ directory of the git checkout of the pipeline. See below for a brief description of what each does and check the files for comments. >Notes: > - All \*.conf files in conf.d/ or conf-logstash/ are executed in alphabetical order, as if they were one huge file. Those ending in .disabled will not be executed (assuming 'path.config: "/etc/logstash/conf.d/*.conf"'). > - If you are not running a standard Netsage pipeline and actions in a particular .conf file are not needed in your particular case, they or the whole .conf file can be removed, but check carefully for downstream effects. > - MaxMind, CAIDA, and Science Registry database files required by the geoip and aggregate filters are downloaded from scienceregistry.netsage.global via cron jobs on a weekly or daily basis. (MaxMind data can change weekly, CAIDA quarterly, Science Registry information randomly.) **NOTE that new versions won't be used in the pipeline until logstash is restarted.** There is a cron file to do this also. Similarly for other support files, eg, those used in 90-additional-fields.conf. -> - Lookup tables for 55-member-orgs.conf that we have compiled are available from sciencregistry.grnoc.iu.edu. See the cron files provided. These will not be updated often, so you may run the cron jobs or not. You will need to provide lists for other networks yourself or ask us. +> - "Member organization" lists that we have stored are available to download from sciencregistry.grnoc.iu.edu. See the cron files provided. These will not be updated often. You will need to provide lists for other networks yourself or ask us. (See Docker Advanced Options.) ## Logstash Sequence -The main things done in each conf file are as follows. +The main things done in each conf file are as follows. (Please double check the comments in the files themselves, as well, in case this documentation fails to keep up with changes.) ### 01-input-rabbit.conf diff --git a/website/docs/pipeline/pmacct.md b/website/docs/pipeline/pmacct.md index 3b67fd74..5e17ed10 100644 --- a/website/docs/pipeline/pmacct.md +++ b/website/docs/pipeline/pmacct.md @@ -3,12 +3,15 @@ id: pmacct title: Pmacct sidebar_label: Pmacct --- -As flow data comes into the pipeline host, it is received by nfacctd and sfacctd processes which are listening on the proper ports. -These do sampling corrections, add sensor name information, and send the flows to a rabbitmq queue. +The pmacct ("p-m-account") package includes sfacctd and nfacctd daemons which receive sflow and netflow/IPFIX flows, respectively. They can also do some processing and filtering, but we use these options very minimally. (Pmacct includes other daemons, as well, but we do not use them. Here, "pmacct" will refer to sfacctd and nfacctd in general.) + +As flow data comes into the pipeline host, it is received by nfacctd and sfacctd processes which are listening on the proper ports (one process per port). +These proceses do sampling corrections, add sensor name information, and send the flows to a rabbitmq queue. Netsage also uses sfacctd to do some preliminary aggregation for sflow, to cut down on the work that logstash needs to do. By default, all samples, with the same 5-tuple, within each 5 minute window are aggregated into one incoming raw flow. ### Configuration -For netsage, pretag.map files are required, one for each nfacctd or sfacctd process. In the bare-metal installation, these are in /etc/pmacct/. For the default docker deployment, we have one for sflow, one for netflow: sfacct-pretag.map and nfacct-pretag.map. These specify the sensor names which are added to the flows. See the comments in the files and the Deployment pages in these docs. +Each nfacctd and sfacctd process requires a main config file. In the bare-metal installation, these are in /etc/pmacct/. For the default docker deployment, they are in {pipeline checkout directory}/conf-pmacct/. There are two basic versions - sfacctd.conf.ORIG and nfacctd.conf.ORIG. See comments within the files. Sensor-specific versions are created from these via a setup script. + +For Netsage, pretag.map files are also required to assign a sensor name, one for each nfacctd or sfacctd process. With the docker deployment, these files are also created by a setup script. By default, these are found in the same directory as the main config files. -Configuration files are also required for each nfacctd or sfacctd process. In the bare-metal installation, these are also in /etc/pmacct/. For the default docker deployment, we have just two files - sfacctd.conf and nfacctd.conf. See comments within the files. diff --git a/website/docs/pipeline/sensors.md b/website/docs/pipeline/sensors.md index f1b6f84c..128d93d4 100644 --- a/website/docs/pipeline/sensors.md +++ b/website/docs/pipeline/sensors.md @@ -4,11 +4,12 @@ title: Sflow/Netflow Data Export sidebar_label: Sflow/Netflow Data --- -Sflow and netflow/IPFIX export can be configured on appropriate network devices. Routers and switches will have at least one of these capabililties built in, although it can somtimes be buggy. +Export of sflow and netflow/IPFIX data can be configured on appropriate network devices. Routers and switches will have at least one of these capabililties built in, although implementations can somtimes be buggy. -We have assumed that each exporter/sensor will send flow data to a different port on the pipeline host. Certainly if different sensors use different sampling rates, this needs to be adhered to. The pipeline uses the port number to recognize which sensor the flows are coming from and tag them with the name of that sensor. +We have assumed that each exporter/sensor is configured to send flow data to a different port on the pipeline host. Certainly if different sensors use different sampling rates, this needs to be adhered to. The pipeline uses the port number to recognize which sensor the flows are coming from and tag them with the name of that sensor. -Sflow exporters simply collect individual samples of packets passing through the device and send them to a collector (pmacct in our case). The sampling rate can be configured, eg, 1 out of every 100 packets. To approximately correct for the fact that most packets are not detected, one assumes that each sampled packet represents N others and multiplies the number of bytes in the sampled packet by the sampling rate N, eg, 100. The sampling rate compared to the number of packets per second flowing through the device determines how accurate this approximation is. Sampling is, of course, least accurate for shorter flows since their packets will be more likely to be missed and the correction applied may overestimate the number of packets and bytes. Discussions of accuracy and sampling rates can be found online. The netsage pipeline then looks for matching packets to aggregate into flows. +Sflow exporters simply collect individual **samples** of packets passing through the device and send them to a collector (pmacct in our case). The netsage pipeline then looks for matching packets to aggregate into flows. The sampling rate can be configured, eg, 1 out of every 100 packets. To approximately correct for the fact that most packets are not detected, one assumes that each sampled packet represents N others and multiplies the number of bytes in the sampled packet by the sampling rate N, eg, 100. The sampling rate compared to the number of packets per second flowing through the device determines how accurate this approximation is. Sampling is least accurate for shorter flows since their packets will be more likely to be missed and the correction applied may overestimate the number of bytes in the flow. Discussions of accuracy and sampling rates can be found online. -Netflow also commonly samples packets, and the same sampling corrections apply, but it also keeps track of the flows and aggregates by the 5-tuple (source and destination IPs, ports, and protocol) on the router. The **active timeout** determines how often netflow sends out an "update" on the flows it is aggregating. The **inactive timeout** determines how long to wait for another matching packet before declaring that a flow has ended. -Typically, the active timeout is 1 minute and the inactive timeout 15 seconds. This means that for flows longer than 1 minute, a "netflow update" is sent out every minute. The tricky thing is that these update-flows all have the same start time (the time the first packet was observed). The end time (the time the last packet was observed) and duration change, but the number of bytes and packets reported corresponds only to the period since the last update. The netsage pipeline attempts to combine these updates to aggregate long flows correctly. +Netflow also commonly samples packets, and the same sampling corrections must be appled, but it also keeps track of the flows and aggregates by the 5-tuple (source and destination IPs, ports, and protocol) *on the router*. The **active timeout** determines how often netflow sends out an "update" on the flows it is aggregating. The **inactive timeout** determines how long to wait for another matching packet before declaring that a flow has ended. + +Typically, the active timeout is set to 1 minute and the inactive timeout to 15 seconds. This means that for flows longer than 1 minute, a "netflow update" is sent out every minute. The tricky thing is that these update-flows all have the same start time (the time the first packet was observed). The end time (the time the last packet was observed) and duration change, but the number of bytes and packets reported corresponds only to the period since the last update. The netsage pipeline attempts to combine these updates to aggregate long flows correctly. diff --git a/website/docs/pipeline/tstat.md b/website/docs/pipeline/tstat.md index 0d537671..e6d47724 100644 --- a/website/docs/pipeline/tstat.md +++ b/website/docs/pipeline/tstat.md @@ -8,6 +8,10 @@ sidebar_label: Tstat Data The **Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) project** provides client programs to parse the captured data and send it to a rabbitmq host where it can then be processed by the [logstash pipeline](logstash), stored in elasticsearch, and finally displayed in our Grafana [dashboards](https://github.com/netsage-project/netsage-grafana-configs). +Tstat is only appropriate for certain situations, eg, tracking traffic into and out of data archives. It does not do sampling and exports only complete flows. It also provides additional information beyond what sflow and netflow provide. + +In the Netage Pipeline, tstat data is treated the same as sflow and netflow data, but the logstash aggregation step is skipped since it is not needed. + Docker images exist on Docker Hub for tstat and tstat_transport. This is still in a beta state and is in development. The initial documentation is available [here](https://github.com/netsage-project/tstat-transport/blob/master/docs/docker.md). From a78a53cb16fa973c9d11602a85b0fb2e2e38d094 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Tue, 16 Aug 2022 18:47:52 +0000 Subject: [PATCH 106/126] More doc changes --- .../docs/deploy/docker_install_advanced.md | 19 +++++++------ website/docs/deploy/docker_install_simple.md | 28 +++++++++---------- website/docs/deploy/docker_troubleshooting.md | 12 ++++---- website/docs/deploy/docker_upgrade.md | 18 ++++++------ website/docs/devel/docker.md | 24 +++++++++------- website/docs/pipeline/intro.md | 6 ++-- website/docs/pipeline/logstash.md | 5 ++-- website/docs/pipeline/pmacct.md | 4 +-- website/docs/pipeline/sensors.md | 8 ++++-- 9 files changed, 65 insertions(+), 59 deletions(-) diff --git a/website/docs/deploy/docker_install_advanced.md b/website/docs/deploy/docker_install_advanced.md index 179ef890..d15525b8 100644 --- a/website/docs/deploy/docker_install_advanced.md +++ b/website/docs/deploy/docker_install_advanced.md @@ -31,7 +31,7 @@ netflowPort_3=9002 #### b. Edit docker-composeoverride_example.yml -Add more nfacctd services to the example override file. When copying and pasting, replace _1 with _2 or _3 in three places! Your file should look look something like this (remember you'll need to do this again after an upgrade! We need to fix the script to do this automatically): +Add more nfacctd services to the **example** override file. When copying and pasting, replace _1 with _2 or _3 in three places! Your file should look look something like this (remember you'll need to do this again after an upgrade! We need to fix the script to do this automatically): ``` nfacctd_1: @@ -78,7 +78,7 @@ In the .env file, uncomment lines in the appropriate section and enter the infor ```sh ifindex_filter_flag=True -*examples:* +## examples (include only 1 such line): ifindex_filter_keep=ALL:123 ifindex_filter_keep=Sensor 1: 123 ifindex_filter_keep=Sensor 1: 456, 789 @@ -94,9 +94,10 @@ Spaces don't matter except within the sensor names. Punctuation is required as s ## To Filter Flows by Subnet -With this option, flows from specified sensors will be dropped unless src or dst is in the list of subnets to keep. -"ALL" can refer to all sensors. -If a sensor is not referenced at all, all of its flows will be kept. +With this option, flows from specified sensors will be dropped unless src or dst is in the list of subnets to keep. It works similarly to the option to filter by interface. "ALL" can refer to all sensors. +If a sensor is not referenced at all, all of its flows will be kept. + +For example, ``` subnet_filter_flag=True @@ -124,11 +125,11 @@ Please notify the devs at IU in advance, if you need to modify a sensor name, be ## To Do Sampling Rate Corrections in Logstash When flow sampling is done, corrections have to be applied to the number of packets and bytes. For example, if you are sampling 1 out of 100 flows, for each flow measured, it is assumed that in reality there would be 100 flows of that size with that src and dst, so the number of bits (and the number of packets, bits/s and packets/s) is multiplied by 100. Usually the collector (nfacctd or sfacctd process) gets the sampling rate from the incoming data and applies the correction, but in some cases, the sensor may not send the sampling rate, or there may be a complex set-up that requires a manual correction. -In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all 3 fields are set properly! The same correction can be applied to multiple sensors by using a comma-separed list. The same correction applies to all listed sensors. For example, +In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all 3 fields are set properly! The same correction can be applied to multiple sensors by using a semicolon-separated list. The same correction applies to all listed sensors. For example, ```sh sampling_correction_flag=True -sampling_correction_sensors=IU Bloomington Sflow, IU Indy Sflow +sampling_correction_sensors=IU Bloomington Sflow; IU Indy Sflow sampling_correction_factor=512 ``` @@ -148,7 +149,7 @@ full_IPs_flag=True ## To Increase Memory Available for Lostash -If cpu or memory usage seems to be a problem, try increasing the java JVM heap size for logstash from 4GB to no more than 8. +If cpu or memory usage seems to be a problem, try increasing the java JVM heap size for logstash from 4GB to 8GB. To do this, edit LS_JAVA_OPTS in the .env file. E.g., ```yaml @@ -165,7 +166,7 @@ Here are some tips for adjusting the JVM heap size (see https://www.elastic.co/g Source and destination organization names come from lookups by ASN or IP in databases provided by CAIDA or MaxMind. (The former is preferred, the latter acts as a backup.) Sometimes an organization that owns an AS and a large block of IPs will allow members or subentities to use certain IP ranges within the same AS. -In this case, all flows to and from the members will have src or dst organization set to the parent organization's name. If desired, the member organizations' names can be substituted. To do requires the use of a "member list" which specifies the ASN(s) that is being shared and the IP ranges for each member. +In this case, all flows to and from the members will have src or dst organization set to the parent organization's name. If desired, the member organizations' names can be substituted. To do so requires the use of a "member list" which specifies the ASN(s) being shared and the IP ranges for each member. See **conf-logstash/support/networkA-members-list.rb.example** for an example. diff --git a/website/docs/deploy/docker_install_simple.md b/website/docs/deploy/docker_install_simple.md index 6b85b4ef..30486161 100644 --- a/website/docs/deploy/docker_install_simple.md +++ b/website/docs/deploy/docker_install_simple.md @@ -3,7 +3,7 @@ id: docker_install_simple title: Docker Installation Guide sidebar_label: Docker Installation --- -In this deployment guide, you will learn how to deploy a basic Netsage setup that includes one sflow and/or one netflow collector. If you have more than one collector of either type, or other special situations, see the Docker Advanced guide. +This deployment guide describes how to deploy a basic Netsage setup that includes one sflow and/or one netflow collector. If you have more than one collector of either type, or other special situations, see the Docker Advanced guide. The Docker containers included in the installation are - sfacctd_1 (sflow collector - receives sflow data and writes it to a rabbit queue) @@ -17,7 +17,7 @@ Decide where to run the Docker Pipeline and get it set up. The default java heap Install Docker Engine (docker-ce, docker-ce-cli, containerd.io) - see instructions at [https://docs.docker.com/engine/install/](https://docs.docker.com/engine/install/). -Start docker +Start docker: ``` sudo systemctl docker start ``` @@ -61,6 +61,15 @@ git checkout {tag} Replace "{tag}" with the release version you intend to use, e.g., "v2.0.0". ("Master" is the development version and is not intended for general use!) `git status` will confirm which branch you are on, e.g., master or v2.0.0. +>Files located in the git checkout that are used by the docker services and cron: +>- the .env file +>- docker-compose.yml and docker-compose.override.yml +>- files in conf-logstash/ +>- non-ORIG files in conf-pmacct/ +>- cron jobs use non-ORIG files in bin/ and cron.d/ and write to logstash-downloads/ +>- logstash may write to or read from logstash-temp/ +> On upgrade, docker-compose.yml, files in conf-logstash, ORIG and example files will be overwritten. + ### 4. Create the Environment File Next, copy `env.example` to `.env` then edit the .env file to set the sensor names, ports, and where to send processed flows. @@ -90,7 +99,7 @@ Sensor names uniquely identify the source of the data and will be shown in the G ./setup-pmacct.sh ``` -This script will use settings in the .env file to create pmacct (ie, nfacctd and sfacctd) config files in conf-pmacct/ from the .ORIG files in the same directory. +This script will use settings in the .env file to create pmacct (ie, nfacctd and sfacctd) config files in **conf-pmacct/** from the .ORIG files in the same directory. It will also create **docker-compose.override.yml** from docker-compose.override_example.yml, or update it if it exists, filling in ${var} values from the .env file. (This is needed since pmacct can't use environment variables directly, like logstash can.) @@ -105,7 +114,7 @@ Check the override file to be sure it looks ok and is consistent with the new co ./setup-cron.sh ``` -This script will create docker-netsage-downloads.cron and .sh and restart-logstash-container.cron and .sh files in cron.d/ and bin/ from .ORIG files in the same directories, filling in required information. +This script will create docker-netsage-downloads.cron and .sh and restart-logstash-container.cron and .sh files in **cron.d/** and **bin/** from .ORIG files in the same directories, filling in required information. The downloads cron job runs the downloads shell script, which will get various files required by the pipeline from scienceregistry.grnoc.iu.edu on a weekly basis. The restart cron job runs the restart shell script, which restarts the logstash container once a day. Logstash must be restarted to pick up any changes in the downloaded files. @@ -125,15 +134,6 @@ bin/docker-netsage-downloads.sh Check to be sure files are in downloads/. ->Files located in the git checkout that are used by the docker services and cron: ->- the .env file ->- docker-compose.yml and docker-compose.override.yml ->- files in conf-logstash/ ->- non-ORIG files in conf-pmacct/ ->- cron jobs use non-ORIG files in bin/ and cron.d/ and write to logstash-downloads/ ->- logstash may write to or read from logstash-temp/ -> On upgrade, docker-compose.yml, files in conf-logstash, ORIG and example files will be overwritten. - ### 8. Start up the Docker Containers Start up the pipeline (all containers) using @@ -178,7 +178,7 @@ To shut down the pipeline (all containers) use The rabbitMQ user interface can be used to see if there are incoming flows from pmacct processes and if those flows are being comsumed by logstash. -In your browser, go to ``` https:///rabbit ``` Login with username guest, password guest. Look at the small graph showing rates for incoming messages, acks, etc. +In your browser, go to ``` https:///rabbit ``` Login with username *guest*, password *guest*. Look at the small graph showing rates for incoming messages, acks, etc. You should see bursts of incoming messages and no longterm buildup of messages in the other graph. ### 10. Check for processed flows diff --git a/website/docs/deploy/docker_troubleshooting.md b/website/docs/deploy/docker_troubleshooting.md index e4d21d6e..7afe1dd9 100644 --- a/website/docs/deploy/docker_troubleshooting.md +++ b/website/docs/deploy/docker_troubleshooting.md @@ -8,8 +8,6 @@ sidebar_label: Troubleshooting - Be sure allow time for the first flows to timeout in the logstash aggregation - wait at least 10-15 minutes after starting up containers. - Use `docker-compose ps` to see if all the containers are (still) running. - (If there are no sflow/netflow sensors, the command should be "echo No Sflow/Netflow sensor" and the container state should be Exit 0.) - - Check the logs of the various containers to see if anything jumps out as being a problem. - If logstash logs say things like *OutOfMemoryError: Java heap space* or *An unexpected connection driver error occured (Exception message: Connection reset)* and the rabbit container is also down... We've seen this before, but are not sure why it occurs. Try stopping everything, restarting docker for good measure, and starting all the containers up again. (If problems are continuing, it might be a memory issue.) ``` @@ -17,9 +15,8 @@ sidebar_label: Troubleshooting sudo systemctl restart docker docker-compose up -d ``` - +- If there is only an *OutOfMemoryError* for java, perhaps you need to increase the java heap size. - Check flow export on the network device to be sure it is (still) configured and running correctly. - - Make sure there really is traffic to be detected (with flows over 10 MB). A circuit outage or simple lack of large flows might be occurring. @@ -44,11 +41,12 @@ sidebar_label: Troubleshooting - In docker-compose.override.yml, make sure the ports are set correctly. You will see *port on host : port in container*. (Docker uses its own port numbers internally.) *Port on host* should match what is in .env (the port the router is sending to on the pipeline host). *Port in container* should match what is in the corresponding pmacct config. - In pmacct config files, make sure amqp_host is set to rabbit (for docker installs) or localhost (for bare metal) - In 'docker-compose ps' output, be sure the command for the sfacctd_1 container is /usr/local/sbin/sfacctd, similarly for nfacctd. -- In docker-compose.yml and docker-compose.override.yml, make sure "command:"s specify config files with the right _n's (these are actually just the parameters for the commands). + (If there are 0 Xflow sensors, the command should be *echo No Xflow sensor* and the container state should be Exit 0.) +- In docker-compose.yml and docker-compose.override.yml, make sure *command:*s specify config files with the right _n's (these are actually just the parameters for the commands). -##Memory: -- If you are running a lot of data, sometimes docker may need to be allocated more memory. The most likely culprit is logstash (java) which is only allocated 4GB of RAM by default. Please see the Docker Advanced Options guide for how to change. +## Memory: +- If you are processing a lot of flows and encountering Out of Memory erros, docker may need to be allocated more memory. The most likely culprit is logstash (java) which is only allocated 4GB of RAM by default (in previous versions, only 2GB). Please see the Docker Advanced Options guide for how to change. ### If there are too few flows and flow sizes and rates are smaller than expected: diff --git a/website/docs/deploy/docker_upgrade.md b/website/docs/deploy/docker_upgrade.md index 6dd60a16..28084035 100644 --- a/website/docs/deploy/docker_upgrade.md +++ b/website/docs/deploy/docker_upgrade.md @@ -29,7 +29,7 @@ git reset --hard will obliterate any changes you have made to non-override files Checkout the version of the pipeline you want to run (replace "{tag}" by the version number, eg, v1.2.11) and make sure it's up to date. ```sh -git checkout -b {tag} +git checkout {tag} git pull ``` @@ -38,19 +38,19 @@ git pull - Compare the .env to env.example to see if any changes have been made. Copy in any updates, particularly any relevant ones, or just recreate the .env file as you did during installation. -- Run the pmacct setup script to recreate the pmacct config files, in case there have been any changes. This might also update the override file. +- Rerun the pmacct setup script to recreate the pmacct config files, in case there have been any changes. This might also update the override file. -```sh -./setup-pmacct.sh -``` + ```sh + ./setup-pmacct.sh + ``` - Compare the docker-compose.override.yml file to the example. (Expect the example file to have environment variables that have gotten filled in in the non-example file.) If there are new lines or sections that are missing, copy them in. The setup script is not able to handle much in the way of changes. -- Rerun the cron setup script to recreate the non-ORIG files in bin/ and cron.d/. +- Rerun the cron setup script to recreate the non-ORIG files in bin/ and cron.d/: -```sh -./setup-cron.sh -``` + ```sh + ./setup-cron.sh + ``` - Compare the resulting .cron files in the cron.d/ directory to those in /etc/cron.d/. If any have changed, copy them to /etc/cron.d/. diff --git a/website/docs/devel/docker.md b/website/docs/devel/docker.md index 1daedc2e..e8764038 100644 --- a/website/docs/devel/docker.md +++ b/website/docs/devel/docker.md @@ -37,12 +37,11 @@ We will normally use official images for rabbitMQ, logstash, nfacctd, and sfacct However, in case there is not an offical image of nfacctd or sfacctd that includes required commits, you may need to build images from master. -Below are the steps used to build the Docker images for v2.0. In the future, you may not have to apply a patch. (Without the patch, when bringing up the nfacctd or sfacctd container, we got *error while loading shared libraries: libndpi.so.4: cannot open shared object file: No such file or directory*.) +Below are the steps used to build the pmacct Docker images for v2.0. In the future, you may not have to apply a patch. (Without the patch, when bringing up the nfacctd or sfacctd container, we got *error while loading shared libraries: libndpi.so.4: cannot open shared object file: No such file or directory*.) -The nfacctd and sfacctd images are just the base image plus specific commands to run. +You may need to first add dns servers from /etc/resolv.conf to /etc/docker/daemon.json and restart docker. ``` -You may need to add dns servers from /etc/resolv.conf to /etc/docker/daemon.json and restart docker $ git clone https://github.com/pmacct/pmacct.git $ mv pmacct pmacct-30June2022+patch $ cd pmacct-30June2022/ @@ -58,7 +57,9 @@ $ sudo docker-compose up -d $ sudo docker-cmopose down ``` -These steps checkout the code from the desired point in time, get files for submodules, apply the patch that was emailed and saved to ~/lensman/GIT/pmacct-30June2022+patch/0001-ci-docker-fix-docker-multi-stage-build.patch on netsage-pipeline-dev2.bldc, build the base image, rename the base image, build nfacctd and sfacctd images. After building, do a test run (of course, first make the .env file, etc.). When ready, push to the Github Container Registry. +These steps clone pmacct, change the name of the directory, checkout the code from the desired point in time, get files for submodules, apply the patch that was emailed and saved to ~/lensman/GIT/pmacct-30June2022+patch/0001-ci-docker-fix-docker-multi-stage-build.patch on netsage-pipeline-dev2.bldc, build the base image, rename the base image, build nfacctd and sfacctd images. After building, do a test run (of course, first make the .env file, etc.). When ready, push to the Github Container Registry. + +The nfacctd and sfacctd images are just the base image plus specific commands to run. ## To push images to the GitHub Container Registry @@ -67,16 +68,19 @@ You need to have a personal access token and (presumably) be part of the Netsage As an example, here is how lisaens pushed the images for 2.0: ``` $ sudo docker login ghcr.io -u lisaens -$ sudo docker images (to get the id) - REPOSITORY TAG IMAGE ID CREATED SIZE - sfacctd 7Jun2022 f62b1c6cddbd 5 weeks ago 346MB - nfacctd 7Jun2022 5833977f6dd0 5 weeks ago 346MB +$ sudo docker images + REPOSITORY TAG IMAGE ID CREATED SIZE + sfacctd 7Jun2022 f62b1c6cddbd 5 weeks ago 346MB + nfacctd 7Jun2022 5833977f6dd0 5 weeks ago 346MB + ... $ sudo docker tag f62b1c6cddbd ghcr.io/netsage-project/sfacctd:7Jun2022 $ sudo docker push ghcr.io/netsage-project/sfacctd:7Jun2022 $ sudo docker tag 5833977f6dd0 ghcr.io/netsage-project/nfacctd:7Jun2022 $ sudo docker push ghcr.io/netsage-project/nfacctd:7Jun2022 -Go to the Netsage Project in github (netsage-project), click on Packages, click on an image, click on Connect to Repository and select Netsage Pipeline, -then go to Package Settings (lower right). In the Danger Zone, click on Change Visibility and choose Public. + +Go to the Netsage Project in github (netsage-project), click on Packages, click on an image, +click on Connect to Repository and select Netsage Pipeline, then go to Package Settings +(lower right). In the Danger Zone, click on Change Visibility and choose Public. ``` NOTE that the docker-compose.yml file must refer to the images using the registry location, eg, for sfacctd `ghcr.io/netsage-project/sfacctd:7jun2022`. diff --git a/website/docs/pipeline/intro.md b/website/docs/pipeline/intro.md index 2067c89d..c2c8163d 100644 --- a/website/docs/pipeline/intro.md +++ b/website/docs/pipeline/intro.md @@ -25,9 +25,9 @@ The **Netsage Flow Processing Pipeline** processes network flow data. It is comp The Netsage Flow Processing Pipeline is made of the following components - - **[Pmacct](https://github.com/pmacct/pmacct)**: The pmacct package includes sfacctd and nfacctd daemons which receive sflow and netflow/IPFIX flows, respectively, and send them to a rabbitmq queue. - - **[RabbitMQ](https://www.rabbitmq.com/)**: Rabbitmq is used for message queueing and passing at a couple of points in the full pipeline. - - **[Logstash](https://www.elastic.co/logstash)**: A logstash pipeline pulls flow data from a rabbit queue and performs a variety of operations to transform it and add additional information. + - **[Pmacct](https://github.com/pmacct/pmacct)**: The pmacct package includes sfacctd and nfacctd daemons which receive sflow and netflow/IPFIX flows, respectively. They are configured to send the flows to a rabbitMQ queue. + - **[RabbitMQ](https://www.rabbitmq.com/)**: RabbitMQ is used for message queueing and passing at a couple of points in the full pipeline. + - **[Logstash](https://www.elastic.co/logstash)**: Our logstash pipeline pulls flow data from a rabbitMQ queue and performs a variety of operations to transform it and add additional information. - **[Elasticsearch](https://www.elastic.co/what-is/elasticsearch)**: Elasticsearch is used for storing the final flow data. ### Pipeline Installation diff --git a/website/docs/pipeline/logstash.md b/website/docs/pipeline/logstash.md index 410a4146..ee0deaf6 100644 --- a/website/docs/pipeline/logstash.md +++ b/website/docs/pipeline/logstash.md @@ -8,7 +8,6 @@ The Logstash portion of the Netsage Pipeline reads flows from a RabbitMQ queue, Logstash .conf files invoke various "filters" and actions. In the bare metal installation, these conf files are located in /etc/logstash/conf.d/. In a docker installation, they are located in the conf-logstash/ directory of the git checkout of the pipeline. See below for a brief description of what each does and check the files for comments. ->Notes: > - All \*.conf files in conf.d/ or conf-logstash/ are executed in alphabetical order, as if they were one huge file. Those ending in .disabled will not be executed (assuming 'path.config: "/etc/logstash/conf.d/*.conf"'). > - If you are not running a standard Netsage pipeline and actions in a particular .conf file are not needed in your particular case, they or the whole .conf file can be removed, but check carefully for downstream effects. > - MaxMind, CAIDA, and Science Registry database files required by the geoip and aggregate filters are downloaded from scienceregistry.netsage.global via cron jobs on a weekly or daily basis. (MaxMind data can change weekly, CAIDA quarterly, Science Registry information randomly.) **NOTE that new versions won't be used in the pipeline until logstash is restarted.** There is a cron file to do this also. Similarly for other support files, eg, those used in 90-additional-fields.conf. @@ -30,7 +29,7 @@ Renames fields provided by pmacct processes to match what the pipeline uses (fro Drops flows to or from private IP addresses; converts any timestamps in milliseconds to seconds; -drops events with timestamps more than a year in the past or (10 sec) in the future; +drops strange events with timestamps more than a year in the past or (10 sec) in the future; sets duration and rates to 0 if duration is <= 0.002 sec (because tiny durations/few samples lead to inaccurate rates) ### 15-sensor-specific-changes.conf @@ -116,7 +115,7 @@ Sets additional quick and easy fields. Supporting mapping or ruby files are use - sensor_type = Circuit, Archive, Exchange Point, Regional Network, Facility Edge, Campus (based on matching sensor names to regexes) - country_scope = Domestic, International, or Mixed (based on src and dst countries and possibly continents, where Domestic = US, Puerto Rico, or Guam) - is_network_testing = yes, no (yes if discipline from the science registry is 'CS.Network Testing and Monitoring' or if port = 5001, 5101, or 5201) - - es_doc_id = hash of meta.id and the start time of the flow. If this id is used as the document id in elasticsearch, flows that are mistakenly input more than once will update existing documents rather than be added as duplicates. (NOTE: due to how netflow works, use es_doc_id as the ES document id only for sflow!) + - es_doc_id = hash of meta.id and the start time of the flow. If this id is used as the document id in elasticsearch, flows that are mistakenly input more than once will update existing documents rather than be added as duplicates. (NOTE: due to how netflow works, use es_doc_id as the ES document id only for sflow!) This id may or may not be used for the document id in Elasticsearch. It may be used for other purposes in grafana dashboards, as well. ### 95-cleanup.conf diff --git a/website/docs/pipeline/pmacct.md b/website/docs/pipeline/pmacct.md index 5e17ed10..9484bfea 100644 --- a/website/docs/pipeline/pmacct.md +++ b/website/docs/pipeline/pmacct.md @@ -10,8 +10,8 @@ These proceses do sampling corrections, add sensor name information, and send th Netsage also uses sfacctd to do some preliminary aggregation for sflow, to cut down on the work that logstash needs to do. By default, all samples, with the same 5-tuple, within each 5 minute window are aggregated into one incoming raw flow. ### Configuration -Each nfacctd and sfacctd process requires a main config file. In the bare-metal installation, these are in /etc/pmacct/. For the default docker deployment, they are in {pipeline checkout directory}/conf-pmacct/. There are two basic versions - sfacctd.conf.ORIG and nfacctd.conf.ORIG. See comments within the files. Sensor-specific versions are created from these via a setup script. +Each nfacctd and sfacctd process requires a main config file. In the bare-metal installation, these are in /etc/pmacct/. For the default docker deployment, they are in {pipeline checkout directory}/conf-pmacct/. There are two basic versions - sfacctd.conf.ORIG and nfacctd.conf.ORIG. See comments within the files. Sensor-specific copies are created from these via a setup script. -For Netsage, pretag.map files are also required to assign a sensor name, one for each nfacctd or sfacctd process. With the docker deployment, these files are also created by a setup script. By default, these are found in the same directory as the main config files. +For Netsage, pretag.map files are also required to assign a sensor name, one file for each nfacctd or sfacctd process. With the docker deployment, these files are also created by a setup script. By default, these are found in the same directory as the main config files. diff --git a/website/docs/pipeline/sensors.md b/website/docs/pipeline/sensors.md index 128d93d4..45088816 100644 --- a/website/docs/pipeline/sensors.md +++ b/website/docs/pipeline/sensors.md @@ -8,8 +8,12 @@ Export of sflow and netflow/IPFIX data can be configured on appropriate network We have assumed that each exporter/sensor is configured to send flow data to a different port on the pipeline host. Certainly if different sensors use different sampling rates, this needs to be adhered to. The pipeline uses the port number to recognize which sensor the flows are coming from and tag them with the name of that sensor. -Sflow exporters simply collect individual **samples** of packets passing through the device and send them to a collector (pmacct in our case). The netsage pipeline then looks for matching packets to aggregate into flows. The sampling rate can be configured, eg, 1 out of every 100 packets. To approximately correct for the fact that most packets are not detected, one assumes that each sampled packet represents N others and multiplies the number of bytes in the sampled packet by the sampling rate N, eg, 100. The sampling rate compared to the number of packets per second flowing through the device determines how accurate this approximation is. Sampling is least accurate for shorter flows since their packets will be more likely to be missed and the correction applied may overestimate the number of bytes in the flow. Discussions of accuracy and sampling rates can be found online. +Sflow exporters simply collect individual **samples** of packets passing through the device and send them to a collector (pmacct in our case). The netsage pipeline then looks for matching packets to aggregate into flows. The sampling rate can be configured, eg, 1 out of every 100 packets. + +>To approximately correct for the fact that most packets are not detected, one assumes that each sampled packet represents N others and multiplies the number of bytes in the sampled packet by the sampling rate N, eg, 100. The sampling rate compared to the number of packets per second flowing through the device determines how accurate this approximation is. Sampling is least accurate for shorter flows since their packets will be more likely to be missed and the correction applied may overestimate the number of bytes in the flow. Discussions of accuracy and sampling rates can be found online. Netflow also commonly samples packets, and the same sampling corrections must be appled, but it also keeps track of the flows and aggregates by the 5-tuple (source and destination IPs, ports, and protocol) *on the router*. The **active timeout** determines how often netflow sends out an "update" on the flows it is aggregating. The **inactive timeout** determines how long to wait for another matching packet before declaring that a flow has ended. -Typically, the active timeout is set to 1 minute and the inactive timeout to 15 seconds. This means that for flows longer than 1 minute, a "netflow update" is sent out every minute. The tricky thing is that these update-flows all have the same start time (the time the first packet was observed). The end time (the time the last packet was observed) and duration change, but the number of bytes and packets reported corresponds only to the period since the last update. The netsage pipeline attempts to combine these updates to aggregate long flows correctly. +>Typically, the active timeout is set to 1 minute and the inactive timeout to 15 seconds. This means that for flows longer than 1 minute, a "netflow update" is sent out every minute. The tricky thing is that these update-flows all have the same start time (the time the first packet was observed). The end time (the time the last packet was observed) and duration change, but the number of bytes and packets reported corresponds only to the period since the last update. The netsage pipeline attempts to combine these updates to aggregate long flows correctly. +> +>Netflow exporters also periodically send "templates" which describe the contents of the flow data datagrams. Before the first template is sent, the flow collector won't know what the sampling rate is, so templates should be sent frequently, eg, every minute. From f72b603d2dc2995773bd745cea741a12a3548a75 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 24 Aug 2022 15:00:31 +0000 Subject: [PATCH 107/126] Set docker-compose to pull nfacctd and sfacctd images from Github Container Registry --- docker-compose.override_example.yml | 4 ++-- docker-compose.yml | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docker-compose.override_example.yml b/docker-compose.override_example.yml index 554703c8..b5b7bce4 100644 --- a/docker-compose.override_example.yml +++ b/docker-compose.override_example.yml @@ -25,10 +25,10 @@ services: # add a section to the .env file that uses *_2 variable names # and increase the number of sflowSensors or netflowSensors; # copy the whole nfacctd_1 or sfacctd_1 service section from docker-compose.yml to docker-compose.override.yml; -# change the ports to reference env variables instead of specifying example values: +# in the copied text, change the ports to reference env variables instead of specifying example values: # - "@{sflowPort_1}:@{sflowContainerPort_1}/udp" # or - "@{netflowPort_1}:@{netflowContainerPort_1}/udp" -# change the @'s to $'s in the above lines! (can't show $'s here or env var values will get stuck into the comment); +# (change the @'s to $'s in the above lines! Can't show $'s here or env var values will get stuck into the comment); # change all _1's to _2's (or _3's for third sensor of one type, etc) in the new section; # then run the setup_pmacct.sh script [again]. # Double check the changes made to the override file! diff --git a/docker-compose.yml b/docker-compose.yml index 49477bc1..11840d89 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,23 +3,23 @@ version: "3.7" # Default docker services and settings. # Do not make changes here; use the override file. -# Shared network for the containers. They will be able to communicate over default ports. +# Shared network for the containers. Processes will be able to communicate over default ports. networks: netsage-network: services: sfacctd_1: container_name: sfacctd_1 - image: sfacctd:7Jun2022 + image: ghcr.io/netsage-project/sfacctd:7Jun2022 env_file: .env ports: # port on host for incoming flow data : port in the container - "8000:8000/udp" volumes: - # location of our configs : default location : read-only + # location of configs on host : location in container : read-only - ./conf-pmacct:/etc/pmacct:ro command: - # override the default parameters (entrypoint is the actual command) + # default parameters for the sfacctd command - -f - /etc/pmacct/sfacctd_1.conf networks: @@ -29,16 +29,16 @@ services: nfacctd_1: container_name: nfacctd_1 - image: nfacctd:7Jun2022 + image: ghcr.io/netsage-project/nfacctd:7Jun2022 env_file: .env ports: # port on host for incoming flow data : port in the container - "9000:9000/udp" volumes: - # location of our configs : default location : read-only + # location of configs on host : location in container : read-only - ./conf-pmacct:/etc/pmacct:ro command: - # override the default parameters (entrypoint is the actual command) + # default parameters for the nfacctd command - -f - /etc/pmacct/nfacctd_1.conf networks: From a83e07179a2db6ff36435d9884c5b0bf30510b38 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 24 Aug 2022 15:14:52 +0000 Subject: [PATCH 108/126] Disabled 80-privatize-org.conf since it is no longer needed for AARNET --- CHANGES.md | 3 + ...rg.conf => 80-privatize-org.conf.disabled} | 55 +++++++------------ conf-logstash/95-cleanup.conf | 13 +++++ 3 files changed, 37 insertions(+), 34 deletions(-) rename conf-logstash/{80-privatize-org.conf => 80-privatize-org.conf.disabled} (70%) diff --git a/CHANGES.md b/CHANGES.md index 1868a6da..2021c253 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,7 @@ ------------------------------------------------------ ## GRNOC NetSage Pipeline 2.0.0 --, 2022 NEW PACKAGE NAME; USING PMACCT INSTEAD OF NFDUMP AND IMPORTER +Docker will pull 7Jun2022 images we made for nfacctd and sfacctd images from Github Container Registry ------------------------------------------------------ Features: * Renamed package to grnoc-netsage-pipeline @@ -37,6 +38,8 @@ Features: * Another cron file restarts the logstash container each day. * Docker-compose.yml ensures logstash runs with uid 1000, while setup-cron.sh sets the owner of logstash-temp/ to 1000, so logstash can write and read aggregation map files when it stops and starts. (User 1000 could be anyone on the host; name doesn't matter.) + * AARNET privatization is no longer needed, so added .disabled to 80-privatize-org.conf, leaving it there as an example. Moved lines making + the org name consistent to 95-cleanup.conf. * Documentation updates * Dependabot automatic remediations of vulnerabilites (for docusaurus) diff --git a/conf-logstash/80-privatize-org.conf b/conf-logstash/80-privatize-org.conf.disabled similarity index 70% rename from conf-logstash/80-privatize-org.conf rename to conf-logstash/80-privatize-org.conf.disabled index 19332cf7..3438119c 100644 --- a/conf-logstash/80-privatize-org.conf +++ b/conf-logstash/80-privatize-org.conf.disabled @@ -1,4 +1,5 @@ # Remove information about any organizations that have privacy rules that require us to not identify them. +### This example privatizes Australian sources and destinations filter { ruby { @@ -32,18 +33,17 @@ filter { tag_on_exception => '_rubyexception in 80-privatize-org' } - # Australian SRCs: Copy some info to [private] and replace sensitive info with AARnet values. - # (Copy then replace in the same mutate filter results in both private and public values being privatized, - # because copy is always done last, so use separate mutates. Update will not create a field if one doesn't exist.) + # Australian SRCs: replace sensitive info with AARnet values. if [@metadata][REDACT-SRC] == "YES" { - mutate { - id => "80-2" - copy => { "[meta][src_organization]" => "[PRIVATE][src_organization]" } - copy => { "[meta][src_asn]" => "[PRIVATE][src_asn]" } - copy => { "[meta][src_ip]" => "[PRIVATE][src_ip]" } - copy => { "[meta][scireg][src][org_name]" => "[PRIVATE][scireg_src_org_name]" } - copy => { "[meta][scireg][src][resource]" => "[PRIVATE][scireg_src_resource]" } - } + +## mutate { +## id => "80-2" +## copy => { "[meta][src_organization]" => "[PRIVATE][src_organization]" } +## copy => { "[meta][src_asn]" => "[PRIVATE][src_asn]" } +## copy => { "[meta][src_ip]" => "[PRIVATE][src_ip]" } +## copy => { "[meta][scireg][src][org_name]" => "[PRIVATE][scireg_src_org_name]" } +## copy => { "[meta][scireg][src][resource]" => "[PRIVATE][scireg_src_resource]" } +## } mutate { id => "80-3" @@ -65,16 +65,17 @@ filter { } # end SRC - # Australian DSTs: Copy some info to [private] and replace sensitive info with AARnet values + # Australian DSTs: replace sensitive info with AARnet values if [@metadata][REDACT-DST] == "YES" { - mutate { - id => "80-5" - copy => { "[meta][dst_organization]" => "[PRIVATE][dst_organization]" } - copy => { "[meta][dst_asn]" => "[PRIVATE][dst_asn]" } - copy => { "[meta][dst_ip]" => "[PRIVATE][dst_ip]" } - copy => { "[meta][scireg][dst][org_name]" => "[PRIVATE][scireg_dst_org_name]" } - copy => { "[meta][scireg][dst][resource]" => "[PRIVATE][scireg_dst_resource]" } - } + +## mutate { +## id => "80-5" +## copy => { "[meta][dst_organization]" => "[PRIVATE][dst_organization]" } +## copy => { "[meta][dst_asn]" => "[PRIVATE][dst_asn]" } +## copy => { "[meta][dst_ip]" => "[PRIVATE][dst_ip]" } +## copy => { "[meta][scireg][dst][org_name]" => "[PRIVATE][scireg_dst_org_name]" } +## copy => { "[meta][scireg][dst][resource]" => "[PRIVATE][scireg_dst_resource]" } +## } mutate { id => "80-6" @@ -97,18 +98,4 @@ filter { } # end DST - # Make sure we have consistent AARNET names even if no redaction (case insensitive) - if [meta][src_organization] =~ /(?i)Australian Academic and Research Network|AARNET/ { - mutate { - id => "80-8" - update => { "[meta][src_organization]" => "Australian Academic and Research Network (AARNet)" } - } - } - if [meta][dst_organization] =~ /(?i)Australian Academic and Research Network|AARNET/ { - mutate { - id => "80-9" - update => { "[meta][dst_organization]" => "Australian Academic and Research Network (AARNet)" } - } - } - } diff --git a/conf-logstash/95-cleanup.conf b/conf-logstash/95-cleanup.conf index 62babd2f..32e9bc22 100644 --- a/conf-logstash/95-cleanup.conf +++ b/conf-logstash/95-cleanup.conf @@ -37,4 +37,17 @@ filter { remove_field => "[type]" } + # Make sure we have consistent AARNET names (case insensitive) + if [meta][src_organization] =~ /(?i)Australian Academic and Research Network|AARNET/ { + mutate { + id => "80-8" + update => { "[meta][src_organization]" => "Australian Academic and Research Network (AARNet)" } + } + } + if [meta][dst_organization] =~ /(?i)Australian Academic and Research Network|AARNET/ { + mutate { + id => "80-9" + update => { "[meta][dst_organization]" => "Australian Academic and Research Network (AARNet)" } + } + } } From 19f9942bec181497ce8711f99882173e4edba2cf Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 24 Aug 2022 16:32:38 +0000 Subject: [PATCH 109/126] moved discovery.type to elasticsearch env vars section --- env.example | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/env.example b/env.example index b227457d..47d02c11 100644 --- a/env.example +++ b/env.example @@ -76,7 +76,7 @@ subnet_filter_flag=False full_IPs_flag=False # LOGSTASH PROCESS SETTINGS: -# memory - java heap size +# memory - max java heap size LS_JAVA_OPTS=-Xmx4g -Xms4g # The aggregation filter requires there be only one logstash worker! Do not change. PIPELINE_WORKERS=1 @@ -84,14 +84,14 @@ PIPELINE_ORDERED=true # other PIPELINE_ECS_COMPATIBILITY=disabled -# RABBITMQ SERVER SETTINGS: +# LOCAL RABBITMQ SERVER SETTINGS: # (for the post-pmacct/pre-logstash queue) RABBIT_HOST=rabbit RABBITMQ_DEFAULT_USER=guest RABBITMQ_DEFAULT_PASS=guest RABBITMQ_ERLANG_COOKIE='secret cookie' -discovery.type=single-node # In case you run an elasticsearch container ELASTIC_HOSTNAME='elastic' +discovery.type=single-node XPACK_MONITORING_ENABLED=false From 0b573e472265d1d9035f214e31d4d9effef25c0e Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 24 Aug 2022 20:57:29 +0000 Subject: [PATCH 110/126] Made 80-provatize-org into an example file --- conf-logstash/80-privatize-org.conf.disabled | 52 +++++++++++--------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/conf-logstash/80-privatize-org.conf.disabled b/conf-logstash/80-privatize-org.conf.disabled index 3438119c..7bc6231e 100644 --- a/conf-logstash/80-privatize-org.conf.disabled +++ b/conf-logstash/80-privatize-org.conf.disabled @@ -1,14 +1,17 @@ # Remove information about any organizations that have privacy rules that require us to not identify them. -### This example privatizes Australian sources and destinations + +### This is a fictional example +# To use, set the following in the code below: ASNs to privatize (asn_array), country the ASNs are in (called "CountryA" below); +# what to use when overwriting org names ("NetworkA"), org abbreviations ("NetA"), latitude and longitude (set to -25 and 25 below), +# scireg resource names ("NetworkA member"), and scireg resource abbrevations ("NetA member") + filter { ruby { id => "80-1" code => ' - # Australian ASNs to privatize - # 7/11/19 - ASNs from "/usr/bin/whois AS7575:AS-RNO" and "/usr/bin/whois AS7575:AS-EDGE" - # (state based networks connected to AARNet and customers on AARNet using public AS numbers): - asn_array = ["AS4738", "AS7569", "AS7571", "AS7570", "AS7572", "AS7573", "AS7574", "AS1851", "AS4822", "AS6262", "AS7475", "AS7476", "AS7573", "AS7575", "AS7637", "AS7645", "AS9348", "AS4608", "AS9383", "AS9517", "AS10106", "AS10148", "AS17807", "AS20144", "AS22556", "AS23654", "AS23719", "AS23859", "AS23935", "AS24032", "AS24101", "AS24313", "AS24390", "AS24431", "AS24433", "AS24434", "AS24436", "AS24437", "AS24490", "AS24510", "AS37978", "AS38076", "AS38083", "AS38280", "AS38307", "AS38474", "AS38568", "AS38568", "AS38795", "AS38858", "AS45128", "AS45158", "AS45213", "AS45797", "AS45962", "AS55354", "AS55363", "AS55491", "AS55773", "AS55813", "AS56065", "AS56132", "AS56210", "AS56219", "AS56303", "AS58422", "AS58528", "AS58582", "AS58584", "AS58611", "AS58686", "AS58698", "AS58877", "AS59206", "AS64090", "AS131294", "AS137188", "AS132129", "AS132158", "AS132345", "AS132693", "AS132728", "AS132868", "AS133019", "AS134096", "AS134111", "AS134115", "AS134197", "AS134197", "AS134700", "AS134748", "AS137965", "AS135350", "AS135520", "AS135892", "AS135893", "AS136013", "AS136016", "AS136135", "AS136247", "AS136549", "AS136753", "AS136770", "AS136912", "AS136921", "AS136621", "AS137073", "AS137400", "AS138017", "AS137837", "AS137529", "AS138201", "AS138390", "AS138447", "AS138468", "AS138537", "AS137429"] + # ASNs to privatize + asn_array = ["AS0001", "AS0002", "AS0003"] # Convert array to hash with values of true asn_hash = asn_array.map {|x| [x,true]}.to_h @@ -20,11 +23,11 @@ filter { dst_country = event.get("[meta][dst_country_name]") # Are flow src or dst in the list? - # Redact only if src or dst is also physically IN Australia - if asn_hash[src_asn] and src_country == "Australia" + # Redact only if src or dst is also physically IN countryA + if asn_hash[src_asn] and src_country == "countryA" event.set( "[@metadata][REDACT-SRC]" , "YES" ) end - if asn_hash[dst_asn] and dst_country == "Australia" + if asn_hash[dst_asn] and dst_country == "countryA" event.set( "[@metadata][REDACT-DST]" , "YES" ) end @@ -33,9 +36,10 @@ filter { tag_on_exception => '_rubyexception in 80-privatize-org' } - # Australian SRCs: replace sensitive info with AARnet values. + # CountryA SRCs: replace sensitive info if [@metadata][REDACT-SRC] == "YES" { + # Save original values if needed or desired ## mutate { ## id => "80-2" ## copy => { "[meta][src_organization]" => "[PRIVATE][src_organization]" } @@ -47,25 +51,25 @@ filter { mutate { id => "80-3" - replace => { "[meta][src_organization]" => "Australian Academic and Research Network (AARNet)" } + replace => { "[meta][src_organization]" => "NetworkA" } replace => { "[meta][src_asn]" => -1 } replace => { "[meta][src_ip]" => "xx.xx.xx.xx" } replace => { "[meta][src_location][lat]" => -25 } - replace => { "[meta][src_location][lon]" => 135 } + replace => { "[meta][src_location][lon]" => 25 } - update => { "[meta][scireg][src][org_name]" => "Australian Academic and Research Network (AARNet)" } - update => { "[meta][scireg][src][org_abbr]" => "AARNet.au" } - update => { "[meta][scireg][src][resource]" => "AARNet member" } - update => { "[meta][scireg][src][resource_abbr]" => "AARNet" } + update => { "[meta][scireg][src][org_name]" => "NetworkA" } + update => { "[meta][scireg][src][org_abbr]" => "NetA" } + update => { "[meta][scireg][src][resource]" => "NetworkA member" } + update => { "[meta][scireg][src][resource_abbr]" => "NetA member" } update => { "[meta][scireg][src][latitude]" => "-25" } - update => { "[meta][scireg][src][longitude]" => "135" } + update => { "[meta][scireg][src][longitude]" => "25" } remove_field => [ "[meta][scireg][src][project_names]" ] } } # end SRC - # Australian DSTs: replace sensitive info with AARnet values + # CountryA DSTs: replace sensitive info if [@metadata][REDACT-DST] == "YES" { ## mutate { @@ -79,18 +83,18 @@ filter { mutate { id => "80-6" - replace => { "[meta][dst_organization]" => "Australian Academic and Research Network (AARNet)" } + replace => { "[meta][dst_organization]" => "NetworkA" } replace => { "[meta][dst_asn]" => -1 } replace => { "[meta][dst_ip]" => "xx.xx.xx.xx" } replace => { "[meta][dst_location][lat]" => -25 } - replace => { "[meta][dst_location][lon]" => 135 } + replace => { "[meta][dst_location][lon]" => 25 } - update => { "[meta][scireg][dst][org_name]" => "Australian Academic and Research Network (AARNet)" } - update => { "[meta][scireg][dst][org_abbr]" => "AARNet.au" } - update => { "[meta][scireg][dst][resource]" => "AARNet member" } - update => { "[meta][scireg][dst][resource_abbr]" => "AARNet" } + update => { "[meta][scireg][dst][org_name]" => "NetworkA" } + update => { "[meta][scireg][dst][org_abbr]" => "NetA" } + update => { "[meta][scireg][dst][resource]" => "NetworkA member" } + update => { "[meta][scireg][dst][resource_abbr]" => "NetA member" } update => { "[meta][scireg][dst][latitude]" => "-25" } - update => { "[meta][scireg][dst][longitude]" => "135" } + update => { "[meta][scireg][dst][longitude]" => "25" } remove_field => [ "[meta][scireg][dst][project_names]" ] } From b473eb8896b52aadee53bc76c5ac7e03acd0e2dc Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 7 Sep 2022 18:20:43 +0000 Subject: [PATCH 111/126] Changed to having script create docker-compose.yml based on example and env files. And added blocks of resusable default settings to compose file. --- .gitignore | 1 + ...-compose.yml => docker-compose.example.yml | 66 ++++++++----- docker-compose.override_example.yml | 23 +---- env.example | 3 + setup-pmacct.sh => setup-pmacct-compose.sh | 99 ++++++++++++------- 5 files changed, 110 insertions(+), 82 deletions(-) rename docker-compose.yml => docker-compose.example.yml (67%) rename setup-pmacct.sh => setup-pmacct-compose.sh (54%) diff --git a/.gitignore b/.gitignore index 5183c947..5171d55b 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ conf/systemd/deploy ~ .env +docker-compose.yml docker-compose.override.yml userConfig bin/docker-netsage-downloads.sh diff --git a/docker-compose.yml b/docker-compose.example.yml similarity index 67% rename from docker-compose.yml rename to docker-compose.example.yml index 11840d89..1216584e 100644 --- a/docker-compose.yml +++ b/docker-compose.example.yml @@ -1,50 +1,62 @@ version: "3.7" -# Default docker services and settings. -# Do not make changes here; use the override file. +# Docker services and settings. +# The non-example version of this file is created by setup-pmacct-compose.sh. +# Use the override file for any manual overrides. # Shared network for the containers. Processes will be able to communicate over default ports. networks: netsage-network: -services: - sfacctd_1: - container_name: sfacctd_1 - image: ghcr.io/netsage-project/sfacctd:7Jun2022 - env_file: .env - ports: - # port on host for incoming flow data : port in the container - - "8000:8000/udp" +# Reusable blocks of settings +x-default-pmacct-settings: + &pmacct-defaults + env_file: + - .env volumes: # location of configs on host : location in container : read-only - ./conf-pmacct:/etc/pmacct:ro - command: - # default parameters for the sfacctd command - - -f - - /etc/pmacct/sfacctd_1.conf networks: - netsage-network depends_on: - rabbit + +x-default-sfacct-settings: + &sflow-defaults + image: + - ghcr.io/netsage-project/sfacctd:7Jun2022 + +x-default-nfacct-settings: + &netflow-defaults + image: + - ghcr.io/netsage-project/nfacctd:7Jun2022 + +# The containers (setup script needs to have them in this order) +services: + + sfacctd_1: + container_name: sfacctd_1 + << : *pmacct-defaults + << : *sflow-defaults + command: + # parameters for the sfacctd command + - -f + - /etc/pmacct/sfacctd_1.conf + ports: + # port on host receiving flow data : port in the container + - "${sflowPort_1}:${sflowContainerPort_1}/udp" nfacctd_1: container_name: nfacctd_1 - image: ghcr.io/netsage-project/nfacctd:7Jun2022 - env_file: .env - ports: - # port on host for incoming flow data : port in the container - - "9000:9000/udp" - volumes: - # location of configs on host : location in container : read-only - - ./conf-pmacct:/etc/pmacct:ro + << : *pmacct-defaults + << : *netflow-defaults command: - # default parameters for the nfacctd command + # parameters for the nfacctd command - -f - /etc/pmacct/nfacctd_1.conf - networks: - - netsage-network - depends_on: - - rabbit + ports: + # port on host receiving flow data : port in the container + - "${netflowPort_1}:${netflowContainerPort_1}/udp" rabbit: container_name: rabbit diff --git a/docker-compose.override_example.yml b/docker-compose.override_example.yml index b5b7bce4..643c8998 100644 --- a/docker-compose.override_example.yml +++ b/docker-compose.override_example.yml @@ -1,13 +1,9 @@ version: "3.7" -# Settings in this file override or add to those in docker-compose.yml. Copy in anything that needs to be changed. -# It will not be overwritten on upgrade. - -# setup_pmacct.sh creates docker_compose.override.yml from docker-compose.override_example.yml, if it does not yet exist, and fills in env vars. -# (It also creates or re-creates pmacct config files, filling in env vars.) - -# For ports: In the final docker_compose.override.yml, the 'port on host' should match what is listed in the .env file, while -# the 'port in the container' should match what is in the *facctd_n conf file (the latter is port number is determined by the setup script). +# Settings in this file override or add to those in docker-compose.yml. +# Add anything that needs to be changed manually. (This is unusual). +# Docker-compose.yml will not be overwritten on upgrade. +# For example: services: @@ -21,14 +17,3 @@ services: # port on host receiving flow data : port in the container - "${netflowPort_1}:${netflowContainerPort_1}/udp" -# TO ADD A SECOND SENSOR OF THE SAME TYPE: -# add a section to the .env file that uses *_2 variable names -# and increase the number of sflowSensors or netflowSensors; -# copy the whole nfacctd_1 or sfacctd_1 service section from docker-compose.yml to docker-compose.override.yml; -# in the copied text, change the ports to reference env variables instead of specifying example values: -# - "@{sflowPort_1}:@{sflowContainerPort_1}/udp" -# or - "@{netflowPort_1}:@{netflowContainerPort_1}/udp" -# (change the @'s to $'s in the above lines! Can't show $'s here or env var values will get stuck into the comment); -# change all _1's to _2's (or _3's for third sensor of one type, etc) in the new section; -# then run the setup_pmacct.sh script [again]. -# Double check the changes made to the override file! diff --git a/env.example b/env.example index 47d02c11..f4761e81 100644 --- a/env.example +++ b/env.example @@ -10,9 +10,11 @@ netflowSensors=1 # The sensor name to assign to flows # The port on the pipeline host to which the router is sending flows #--- REPLACE EXAMPLE VALUES --- +# sflow sensors: sflowSensorName_1=The Sflow Sensor Name sflowPort_1=8000 +# netflow sensors: netflowSensorName_1=The Netflow Sensor Name netflowPort_1=9000 @@ -35,6 +37,7 @@ rabbitmq_output_key=processed_flows # Default inactivity_timeout is 6-minute. If no matching flows have come in for 6 minutes, end the aggregated flow. # Default max_flow_timeout is 1 hour. This is the maximum allowed flow duration; longer flows will be broken up. # Aggregation_maps_path is the file where flows undergoing aggregation are saved if logstash shuts down. The default is for Docker installs. +# These should not normally be changed. inactivity_timeout=360 max_flow_timeout=3600 aggregation_maps_path=/logstash-temp/logstash-aggregation-maps diff --git a/setup-pmacct.sh b/setup-pmacct-compose.sh similarity index 54% rename from setup-pmacct.sh rename to setup-pmacct-compose.sh index b5440973..46300906 100755 --- a/setup-pmacct.sh +++ b/setup-pmacct-compose.sh @@ -1,8 +1,10 @@ #!/bin/bash # This script reads pmacct env variables from the .env file, -# creates config files from the examples, and copies the env variable +# [re]creates pmacct config files from the examples, and copies the env variable # values into them. (Needed because pmacct doesn't support using env vars) +# It also [re]creates the docker-compose.yml file based on .env file entries. + echo "" # Get env variables from .env file @@ -30,13 +32,17 @@ do fi done < "$input" -# Loop over sflow sensors / create config files +# Create the docker-compose.yml file by copying the example (will overwrite any existing) +echo "Creating docker-compose.yml." +# Delete all the pmacct services, ie, everything between "services:" and "rabbit:" +# -0777 = treat the whole file as one string; -e code-to-run; .../s = interpret . as any char or newline. +perl -0777 -pe "s/services:.*rabbit:/services:\n\nINSERT-HERE\n\n rabbit:/s" < docker-compose.example.yml > docker-compose.yml + +# Loop over sflow sensors / create config files (will overwrite any existing) port=8000 for (( n=1; n<=${sflowSensors}; n++ )) do # assign the port the container will use - # (Note that it is important to have the same internal (container) port numbers used for the same services (eg, _1) - # every time this script is run, since an override file with hardcoded port numbers may already exist.) export sflowContainerPort_$n=$port # create temp config files cp conf-pmacct/sfacctd.conf.ORIG conf-pmacct/sfacctd_$n.conf.temp @@ -49,17 +55,37 @@ do envsubst < conf-pmacct/sfacctd-pretag_$n.map.temp > conf-pmacct/sfacctd-pretag_$n.map # remove temp files rm conf-pmacct/*.temp + + # service info for compose file; export so perl can see it. + export section=' sfacctd_1: + container_name: sfacctd_1 + << : *pmacct-defaults + << : *sflow-defaults + command: + # parameters for the sfacctd command + - -f + - /etc/pmacct/sfacctd_1.conf + ports: + # port on host receiving flow data : port in the container + - "${sflowPort_1}:${sflowContainerPort_1}/udp" + +INSERT-HERE' + + # substitute _$n for _1 in $section + section=$(sed 's/_1/_'"$n"'/g' <<< "$section") + + # write it into the compose file + perl -i -pe 's/INSERT-HERE/$ENV{section}/' docker-compose.yml + # next port number is 1 more port=$(($port+1)) done -# Loop over netflow sensors / create config files +# Loop over netflow sensors / create config files (will overwrite any existing) port=9000 for (( n=1; n<=${netflowSensors}; n++ )) do # assign the port the container will use - # (Note that it is important to have the same internal (container) port numbers used for the same services (eg, _1) - # every time this script is run, since an override file with hardcoded port numbers may already exist.) export netflowContainerPort_$n=$port # create temp config files cp conf-pmacct/nfacctd.conf.ORIG conf-pmacct/nfacctd_$n.conf.temp @@ -72,41 +98,42 @@ do envsubst < conf-pmacct/nfacctd-pretag_$n.map.temp > conf-pmacct/nfacctd-pretag_$n.map # remove temp files rm conf-pmacct/*.temp + + # service info for compose file; export so perl can see it. + export section=' nfacctd_1: + container_name: nfacctd_1 + << : *pmacct-defaults + << : *netflow-defaults + command: + # parameters for the nfacctd command + - -f + - /etc/pmacct/nfacctd_1.conf + ports: + # port on host receiving flow data : port in the container + - "${netflowPort_1}:${netflowContainerPort_1}/udp" + +INSERT-HERE' + + # substitute _$n for _1 in $section + section=$(sed 's/_1/_'"$n"'/g' <<< "$section") + + # write it into the compose file + perl -i -pe 's/INSERT-HERE/$ENV{section}/' docker-compose.yml + # next port number is 1 more port=$(($port+1)) done -# If the docker-compose.override file doesn't exist, make it by copying the example -if [[ ! -f "docker-compose.override.yml" ]] -then - echo "Creating docker-compose.override.yml." - cp docker-compose.override_example.yml docker-compose.override.yml -fi - -# If there are no sflow sensors, and we didn't already do it, override the sfacctd command so the container -# just echos a line and exits right away; and set the port env vars to defaults so docker-compose doesn't complain that either is unset -if [[ ${sflowSensors} -eq 0 ]] && ! grep -ql "No Sflow collector" "docker-compose.override.yml" -then - echo "Replacing entry_point for sflow collector since it is not needed." - sed -i "s/sfacctd_1:/sfacctd_1:\n entrypoint: echo 'No Sflow collector.'/" docker-compose.override.yml - export sflowPort_1=8000 - export sflowContainerPort_1=8000 -fi -# Same if no netflow sensors -if [[ ${netflowSensors} -eq 0 ]] && ! grep -ql "No Netflow collector" "docker-compose.override.yml" -then - echo "Replacing entry_point for netflow collector since it is not needed." - sed -i "s/nfacctd_1:/nfacctd_1:\n entrypoint: echo 'No Netflow collector.'/" docker-compose.override.yml - export netflowPort_1=9000 - export netflowContainerPort_1=9000 -fi - -# Replace any env variables in the override file. -envsubst < docker-compose.override.yml > docker-compose.override.yml.temp -mv docker-compose.override.yml.temp docker-compose.override.yml +# Get rid of any remaining "INSERT-HERE" lines + perl -i -pe 's/INSERT-HERE//' docker-compose.yml + + +# Replace any env variables in the compose file. +envsubst < docker-compose.yml > docker-compose.yml.temp +mv docker-compose.yml.temp docker-compose.yml echo " Pmacct config files have been created, based on the .env file." -echo " Please check the docker-compose.override.yml file to be sure it matches the .env file!" +echo " Docker-compose.yml has been created. Please check to be sure it matches the .env file!" echo "" From 02e32c8c2b0281c5161c64222cc22803fb619ca1 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 7 Sep 2022 18:54:31 +0000 Subject: [PATCH 112/126] Updated CHANGES file to include recent changes --- CHANGES.md | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 2021c253..7738d2e1 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -7,20 +7,21 @@ Features: * Renamed package to grnoc-netsage-pipeline * Got rid of old importer references, requirements, files, etc. * Used the %post section in the spec file to check to see if pmacct is installed. - * Added systemd unit files for sfacctd and nfacctd (default will be 1 sflow, 1 netflow source, for docker installs) + * Added systemd unit files for bare-metal sfacctd and nfacctd (default will be 1 sflow, 1 netflow source, for docker installs) * Revised docker-compose.yml file, etc. to work with pmacct containers. * Revised parts of the .env file, including adding variables for number of sflow and netflow sensors. - * Added default sfacct and nfacct config files in conf-pmacct/ (.ORIG files to be copied) - * Added setup-pmacct.sh script which the user runs to create pmacct config files and create or modify docker-compose.override.yml, - filling in environment variables set in the .env file. (pmacct configs cannot use env vars directly.) - * The number of sflow or netflow sensors can be 0. In this case, the setup script makes the container just run an echo command - after which it shuts down. + * Added example/default sfacct and nfacct config files in conf-pmacct/ (.ORIG files to be copied) + * Added setup-pmacct-compose.sh script which the user runs to create pmacct config files and docker-compose.yml, based on + docker-compose.example.yml and .env files. (pmacct configs cannot use env vars directly, so script fills them in.) + * The number of sflow or netflow sensors can be 0. In this case, the setup script does not include any of the unneeded services + in the docker-compose.yml file. * Added 05-translate-pmacct.conf logstash config to translate pmacct fields to ones the pipeline uses. * Revised 40-aggregation.conf to deal with pmacct; there are separate sections for sflow and netflow. * For netflow, in 40-aggregation.conf, the start time of incoming flows will be adjusted if duration is greater than the active timeout (ie, for "updates" to long lasting flows) * The default inactive timeout for logstash aggregation has been set to 6 minutes (to go with 5 minute sflow aggregation by sfacctd) - * Added 41-thresholds.conf - applies size threshold of 10 MB (drop smaller flows) and duration threshold of 0.1 sec (set rates to 0 if shorter) + * Added 41-thresholds.conf - applies size threshold of 10 MB (ie, drop smaller flows) and duration threshold of 0.1 sec (ie, set + duration and rates to 0 if shorter) after aggregation is finished. * Added new field: @sampling_corrected = yes/no. If sampling rate correction has been applied by pmacct or logstash, value will be yes. * Sampling rate corrections will be done in logstash when requested (ie, flag is set in the env file) but ONLY IF a correction has not yet been applied (by pmacct). @@ -28,18 +29,18 @@ Features: * Allowed "ALL" when specifying sensors for sampling rate corrections. * When a sampling rate correction is applied by logstash, add a tag with the rate. * Added CERN and Utah regexes to sensor type and group files. - * Added an env file option to skip de-identification. + * Added an option to skip de-identification. Set it in .env. * 0.0.0.x and 0.0.0.0 flows are tagged and dropped by default. (Unadvertised option to keep them is available in the env file.) * Changed to sensor_groups.json.example and sensor_types.json.example. From now on, our particular files/regexes will be downloaded from scienceregistry.grnoc. * Added setup-cron.sh script which copies .ORIG .cron and .sh files and writes in username and the location of the git checkout. The user must copy cron files to /etc/cron.d/. - * One cron file runs a script to download all files from scienceregistry.grnoc once/wk. + * One cron file runs a script to download all files (caida, maxmind, etc) from scienceregistry.grnoc once/wk. * Another cron file restarts the logstash container each day. * Docker-compose.yml ensures logstash runs with uid 1000, while setup-cron.sh sets the owner of logstash-temp/ to 1000, so logstash can write and read aggregation map files when it stops and starts. (User 1000 could be anyone on the host; name doesn't matter.) - * AARNET privatization is no longer needed, so added .disabled to 80-privatize-org.conf, leaving it there as an example. Moved lines making - the org name consistent to 95-cleanup.conf. + * AARNET privatization is no longer needed, so added .disabled to 80-privatize-org.conf, leaving a generalized version there as an + example. Moved lines making the org name consistent to 95-cleanup.conf. * Documentation updates * Dependabot automatic remediations of vulnerabilites (for docusaurus) From f05a0e5848724eee8a8a057307f7b0f61bf31ff6 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 7 Sep 2022 20:49:45 +0000 Subject: [PATCH 113/126] Fixed image lines in docker-compose. 05-Will write a tag if pmacct reports the actual sampling rate. --- conf-logstash/05-translate-pmacct.conf | 8 ++++++++ docker-compose.example.yml | 8 +++----- setup-pmacct-compose.sh | 4 ++-- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/conf-logstash/05-translate-pmacct.conf b/conf-logstash/05-translate-pmacct.conf index cf8fcb0b..c4c89a95 100644 --- a/conf-logstash/05-translate-pmacct.conf +++ b/conf-logstash/05-translate-pmacct.conf @@ -45,6 +45,14 @@ filter { add_field => { "@sampling_corrected" => "yes" } } } + # In case pmacct starts sending the actual sampling rate + if [sampling_rate] > 1 { + mutate { + id => "05-3.1" + add_tag => ["Pre-ingest sampling rate = %{sampling_rate}."] + } + } + # Get sensor name # Note: In the pmacct pretag file, label must be set to sfacct-- or nfacct-- # followed by the real sensor name with spaces replaced by #s. diff --git a/docker-compose.example.yml b/docker-compose.example.yml index 1216584e..b4720aca 100644 --- a/docker-compose.example.yml +++ b/docker-compose.example.yml @@ -23,13 +23,11 @@ x-default-pmacct-settings: x-default-sfacct-settings: &sflow-defaults - image: - - ghcr.io/netsage-project/sfacctd:7Jun2022 + image: ghcr.io/netsage-project/sfacctd:7Jun2022 x-default-nfacct-settings: &netflow-defaults - image: - - ghcr.io/netsage-project/nfacctd:7Jun2022 + image: ghcr.io/netsage-project/nfacctd:7Jun2022 # The containers (setup script needs to have them in this order) services: @@ -78,7 +76,7 @@ services: container_name: logstash image: docker.elastic.co/logstash/logstash:7.16.2 env_file: .env - # user uid + # user uid is expected to be 1000 user: 1000:1000 # Explicitly specify *.conf to be sure logstash doesn't use *.disabled configs. command: logstash -f /etc/logstash/conf.d/*.conf diff --git a/setup-pmacct-compose.sh b/setup-pmacct-compose.sh index 46300906..d83e74ed 100755 --- a/setup-pmacct-compose.sh +++ b/setup-pmacct-compose.sh @@ -132,8 +132,8 @@ done envsubst < docker-compose.yml > docker-compose.yml.temp mv docker-compose.yml.temp docker-compose.yml -echo " Pmacct config files have been created, based on the .env file." -echo " Docker-compose.yml has been created. Please check to be sure it matches the .env file!" +echo " Pmacct config and docker-compose.yml files have been created, based on the .env file." +echo " Please check to be sure Docker-compose.yml has the right number of sfacctd and pmacctd services with the right port numbers!" echo "" From 85315312d04dd99dc6b898984c54391e12f074e0 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 8 Sep 2022 21:32:28 +0000 Subject: [PATCH 114/126] Some fixes to MANIFEST and spec files --- MANIFEST | 8 ++++---- grnoc-netsage-pipeline.spec | 18 ++++++++---------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/MANIFEST b/MANIFEST index 21d1142d..dc605acd 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1,10 +1,10 @@ grnoc-netsage-pipeline.spec CHANGES.md bin/restart-logstash-service.sh -conf-pmacct/sfacctd.conf -conf-pmacct/nfacctd.conf -conf-pmacct/sfacct-pretag.map -conf-pmacct/nfacct-pretag.map +conf-pmacct/sfacctd.conf.ORIG +conf-pmacct/nfacctd.conf.ORIG +conf-pmacct/sfacctd-pretag.map.ORIG +conf-pmacct/nfacctd-pretag.map.ORIG conf-logstash/01-input-jsonfile.conf.disabled conf-logstash/01-input-multiline-json-file.conf.disabled conf-logstash/01-input-rabbit.conf diff --git a/grnoc-netsage-pipeline.spec b/grnoc-netsage-pipeline.spec index 82c16c1d..71118b84 100644 --- a/grnoc-netsage-pipeline.spec +++ b/grnoc-netsage-pipeline.spec @@ -115,10 +115,6 @@ rm -rf $RPM_BUILD_ROOT %config(noreplace) /etc/cron.d/netsage-logstash-restart.cron # Don't overwrite these .confs. Create .rpmnew files if needed. -%config(noreplace) /etc/pmacct/sfacctd.conf -%config(noreplace) /etc/pmacct/nfacctd.conf -%config(noreplace) /etc/pmacct/sfacct-pretag.map -%config(noreplace) /etc/pmacct/nfacct-pretag.map %config(noreplace) /etc/logstash/conf.d/01-input-rabbit.conf %config(noreplace) /etc/logstash/conf.d/15-sensor-specific-changes.conf %config(noreplace) /etc/logstash/conf.d/40-aggregation.conf @@ -151,6 +147,11 @@ rm -rf $RPM_BUILD_ROOT %config /etc/logstash/conf.d/support/sensor_types.json %config /etc/logstash/conf.d/support/networkA-members-list.rb.example +%config /etc/pmacct/sfacctd.conf.ORIG +%config /etc/pmacct/nfacctd.conf.ORIG +%config /etc/pmacct/sfacctd-pretag.map.ORIG +%config /etc/pmacct/nfacctd-pretag.map.ORIG + /usr/share/doc/grnoc/netsage-pipeline/CHANGES.md /usr/share/doc/grnoc/netsage-pipeline/INSTALL.md @@ -187,16 +188,13 @@ echo "-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*" echo "AFTER UPGRADING..." echo " " echo " * Check config and cron files with .rpmnew and .rpmsave versions to see if any need manual updates." -echo " * Pmacct configs: /etc/pmacct/. Logstash configs: /etc/logstash/conf.d/." -echo " * Pmacct configs and Logstash configs 01, 15, 40, and 99 are not replaced by updated versions, so check for changes. " -echo " * If using 55-member-orgs.conf, make sure you have the required files in support/. See comments in the conf file. " +echo " * Pmacct configs: You must create or copy to /etc/pmacct/. Examples are provided. See if any changes are required to existing files." +echo " * Logstash configs: /etc/logstash/conf.d/. 01, 15, 40, and 99 are not replaced by updated versions, so check for changes. " +echo " * Make sure you have any required member organization files in support/. See comments in the conf file. " echo " " echo " * Note that this rpm puts logstash config files in /etc/logstash/conf.d/ and doesn't manage multiple pipelines in pipelines.yml." echo " * Nor does it manage multiple pmacct processes." echo " " -echo " * IMPORTANT: Be sure the number of logstash pipeline workers is 1, or flow stitching (aggregation) won't work right. **" -echo " * and be sure logstash configs are specified by *.conf in the right directory." -echo " " echo " * [Re]start logstash and pmacct processes " echo "-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*" echo " " From c93533a0fe88935a381be02892626b37b3497fdd Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 8 Sep 2022 21:38:26 +0000 Subject: [PATCH 115/126] Set 80 to disabled in MANIFEST and spec files --- MANIFEST | 2 +- grnoc-netsage-pipeline.spec | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/MANIFEST b/MANIFEST index dc605acd..eff90011 100644 --- a/MANIFEST +++ b/MANIFEST @@ -20,7 +20,7 @@ conf-logstash/53-caida-org.conf conf-logstash/55-member-orgs.conf conf-logstash/60-scireg-tagging-fakegeoip.conf conf-logstash/70-deidentify.conf -conf-logstash/80-privatize-org.conf +conf-logstash/80-privatize-org.conf.disabled conf-logstash/88-preferred-location-org.conf conf-logstash/90-additional-fields.conf conf-logstash/95-cleanup.conf diff --git a/grnoc-netsage-pipeline.spec b/grnoc-netsage-pipeline.spec index 71118b84..1319feaa 100644 --- a/grnoc-netsage-pipeline.spec +++ b/grnoc-netsage-pipeline.spec @@ -135,7 +135,7 @@ rm -rf $RPM_BUILD_ROOT %config /etc/logstash/conf.d/55-member-orgs.conf %config /etc/logstash/conf.d/60-scireg-tagging-fakegeoip.conf %config /etc/logstash/conf.d/70-deidentify.conf -%config /etc/logstash/conf.d/80-privatize-org.conf +%config /etc/logstash/conf.d/80-privatize-org.conf.disabled %config /etc/logstash/conf.d/88-preferred-location-org.conf %config /etc/logstash/conf.d/90-additional-fields.conf %config /etc/logstash/conf.d/95-cleanup.conf From b138dabe6ee8f7a99db187cfd762a4cdb4b8fbae Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Tue, 13 Sep 2022 18:48:40 +0000 Subject: [PATCH 116/126] Combined some logstash 99-output confs. Fixed ids in 95-cleanup. Revised some comments. --- conf-logstash/95-cleanup.conf | 4 +-- conf-logstash/99-output-file.conf.disabled | 27 +++++++++++++++++++ conf-logstash/99-output-jsonlog.conf.disabled | 8 ------ .../99-output-multiline-json.conf.disabled | 17 ------------ conf-pmacct/nfacctd-pretag.map.ORIG | 10 ++++--- conf-pmacct/sfacctd-pretag.map.ORIG | 10 ++++--- docker-compose.example.yml | 5 ++-- env.example | 2 +- 8 files changed, 46 insertions(+), 37 deletions(-) create mode 100644 conf-logstash/99-output-file.conf.disabled delete mode 100644 conf-logstash/99-output-jsonlog.conf.disabled delete mode 100644 conf-logstash/99-output-multiline-json.conf.disabled diff --git a/conf-logstash/95-cleanup.conf b/conf-logstash/95-cleanup.conf index 32e9bc22..81e5ccc9 100644 --- a/conf-logstash/95-cleanup.conf +++ b/conf-logstash/95-cleanup.conf @@ -40,13 +40,13 @@ filter { # Make sure we have consistent AARNET names (case insensitive) if [meta][src_organization] =~ /(?i)Australian Academic and Research Network|AARNET/ { mutate { - id => "80-8" + id => "95-6" update => { "[meta][src_organization]" => "Australian Academic and Research Network (AARNet)" } } } if [meta][dst_organization] =~ /(?i)Australian Academic and Research Network|AARNET/ { mutate { - id => "80-9" + id => "95-7" update => { "[meta][dst_organization]" => "Australian Academic and Research Network (AARNet)" } } } diff --git a/conf-logstash/99-output-file.conf.disabled b/conf-logstash/99-output-file.conf.disabled new file mode 100644 index 00000000..040841ce --- /dev/null +++ b/conf-logstash/99-output-file.conf.disabled @@ -0,0 +1,27 @@ +output { + file { + path => "/logstash-temp/test-output" + + #codec => json + + # json with newlines (default) + #codec => json_lines + + # one line per field (x => v) + codec => rubydebug + + # one line per event (no field names?) + #codec => plain + + # formatted output + #codec => line {format => + #'{ "start":%{start}, "end":%{end}, "interval":%{interval}, + # "meta":{ "sensor_id":"%{[meta][sensor_id]}", "protocol":"%{[meta][protocol]}", "flow_type":"%{[meta][flow_type]}", + # "src_ip":"%{[meta][src_ip]}", "src_port":%{[meta][src_port]}, "src_asn":%{[meta][src_asn]}, "src_ifindex":%{[meta][src_ifindex]}, + # "dst_ip":"%{[meta][dst_ip]}", "dst_port":%{[meta][dst_port]}, "dst_asn":%{[meta][dst_asn]}, "dst_ifindex":%{[meta][dst_ifindex]} }, + # "values":%{values} } '} + + # append (default) or overwrite - use append to write all events, but delete file between runs + write_behavior => append + } +} diff --git a/conf-logstash/99-output-jsonlog.conf.disabled b/conf-logstash/99-output-jsonlog.conf.disabled deleted file mode 100644 index 98d9ce8e..00000000 --- a/conf-logstash/99-output-jsonlog.conf.disabled +++ /dev/null @@ -1,8 +0,0 @@ -output { - file { - path => "/data/all.json" - codec => json_lines - #codec => json - # write_behavior => overwrite - } -} \ No newline at end of file diff --git a/conf-logstash/99-output-multiline-json.conf.disabled b/conf-logstash/99-output-multiline-json.conf.disabled deleted file mode 100644 index 6e6f8a18..00000000 --- a/conf-logstash/99-output-multiline-json.conf.disabled +++ /dev/null @@ -1,17 +0,0 @@ -# Mainly for dev work. -# Write events as readable json (ie, with new-lines; only fields as specified below between single quotes). -# The output file can be read with 01-input-multiline-json.conf. -# We need "append" to write all events to the file, but delete the file in-between runs or it'll keep appending. -# (The example output fields are to directly write out events read from the "raw" rabbit queue.) -output { - file { - path => "/testdir/test-data.json" - codec => line {format => - '{ "start":%{start}, "end":%{end}, "interval":%{interval}, - "meta":{ "sensor_id":"%{[meta][sensor_id]}", "protocol":"%{[meta][protocol]}", "flow_type":"%{[meta][flow_type]}", - "src_ip":"%{[meta][src_ip]}", "src_port":%{[meta][src_port]}, "src_asn":%{[meta][src_asn]}, "src_ifindex":%{[meta][src_ifindex]}, - "dst_ip":"%{[meta][dst_ip]}", "dst_port":%{[meta][dst_port]}, "dst_asn":%{[meta][dst_asn]}, "dst_ifindex":%{[meta][dst_ifindex]} }, - "values":%{values} } '} - write_behavior => append - } -} diff --git a/conf-pmacct/nfacctd-pretag.map.ORIG b/conf-pmacct/nfacctd-pretag.map.ORIG index b6d49300..65bce704 100644 --- a/conf-pmacct/nfacctd-pretag.map.ORIG +++ b/conf-pmacct/nfacctd-pretag.map.ORIG @@ -1,6 +1,10 @@ -! This file is referenced in a config file and used to set the "label" field to a sensor name. -! Label should be "sfacct--" or "nfacct--" (for sflow or netflow, respectively) -! followed by the sensor name with spaces replaced by #'s +! This file is referenced in the nfacctd config file and used to set the "label" field +! which is parsed to obtain and set the sensor name. +! +! After the setup script runs, label should be "nfacct--" for sflow +! followed by the sensor name with spaces replaced by #'s. +! No commas are allowed in the sensor name! ! eg, set_label=nfacct--Netflow#Sensor set_label=${netflowSensorName_1} + diff --git a/conf-pmacct/sfacctd-pretag.map.ORIG b/conf-pmacct/sfacctd-pretag.map.ORIG index 048dd54e..179aa22f 100644 --- a/conf-pmacct/sfacctd-pretag.map.ORIG +++ b/conf-pmacct/sfacctd-pretag.map.ORIG @@ -1,6 +1,10 @@ -! This file is referenced in a config file and used to set the "label" field to a sensor name. -! Label should be "sfacct--" or "nfacct--" (for sflow or netflow, respectively) -! followed by the sensor name with spaces replaced by #'s +! This file is referenced in the sfacctd config file and used to set the "label" field +! which is parsed to obtain and set the sensor name. +! +! After the setup script runs, label should be "sfacct--" for sflow +! followed by the sensor name with spaces replaced by #'s. +! No commas are allowed in the sensor name! ! eg, set_label=sfacct--Sflow#Sensor set_label=${sflowSensorName_1} + diff --git a/docker-compose.example.yml b/docker-compose.example.yml index b4720aca..ceae4865 100644 --- a/docker-compose.example.yml +++ b/docker-compose.example.yml @@ -11,8 +11,7 @@ networks: # Reusable blocks of settings x-default-pmacct-settings: &pmacct-defaults - env_file: - - .env + env_file: .env volumes: # location of configs on host : location in container : read-only - ./conf-pmacct:/etc/pmacct:ro @@ -29,7 +28,7 @@ x-default-nfacct-settings: &netflow-defaults image: ghcr.io/netsage-project/nfacctd:7Jun2022 -# The containers (setup script needs to have them in this order) +# The containers (the setup script will replace those for sfacctd and nfacctd) services: sfacctd_1: diff --git a/env.example b/env.example index f4761e81..5f9d4d4c 100644 --- a/env.example +++ b/env.example @@ -7,7 +7,7 @@ netflowSensors=1 # Env variables for one sensor should all end in the same suffix, # and there should be a sequence (_1, _2, etc) for sflow and a sequence for netflow. # For each sensor, list the following: -# The sensor name to assign to flows +# The sensor name to assign to flows (cannot contain commas with pmacct) # The port on the pipeline host to which the router is sending flows #--- REPLACE EXAMPLE VALUES --- # sflow sensors: From 95475955658953862db7785389a30ebfde7a2778 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 14 Sep 2022 18:21:55 +0000 Subject: [PATCH 117/126] spec file and manifest changes for new 99-output-file --- MANIFEST | 5 ++--- grnoc-netsage-pipeline.spec | 3 +-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/MANIFEST b/MANIFEST index eff90011..9216d59c 100644 --- a/MANIFEST +++ b/MANIFEST @@ -25,11 +25,10 @@ conf-logstash/88-preferred-location-org.conf conf-logstash/90-additional-fields.conf conf-logstash/95-cleanup.conf conf-logstash/98-post-process.conf +conf-logstash/99-output-rabbit.conf +conf-logstash/99-output-file.conf conf-logstash/99-output-elastic.conf.disabled -conf-logstash/99-output-jsonlog.conf.disabled -conf-logstash/99-output-multiline-json.conf.disabled conf-logstash/99-output-stdout.conf.disabled -conf-logstash/99-output-rabbit.conf conf-logstash/ruby/anonymize_ipv6.rb conf-logstash/ruby/domestic.rb conf-logstash/support/sensor_groups.json diff --git a/grnoc-netsage-pipeline.spec b/grnoc-netsage-pipeline.spec index 1319feaa..29994f2b 100644 --- a/grnoc-netsage-pipeline.spec +++ b/grnoc-netsage-pipeline.spec @@ -122,8 +122,7 @@ rm -rf $RPM_BUILD_ROOT # logstash files that can be updated automatically (if there are updates, the old ver will be in .rpmsave) %config(noreplace) /etc/logstash/conf.d/01-input-jsonfile.conf.disabled %config(noreplace) /etc/logstash/conf.d/01-input-multiline-json-file.conf.disabled -%config(noreplace) /etc/logstash/conf.d/99-output-jsonlog.conf.disabled -%config(noreplace) /etc/logstash/conf.d/99-output-multiline-json.conf.disabled +%config(noreplace) /etc/logstash/conf.d/99-output-file.conf.disabled %config(noreplace) /etc/logstash/conf.d/99-output-elastic.conf.disabled %config /etc/logstash/conf.d/05-translate-pmacct.conf %config /etc/logstash/conf.d/10-preliminaries.conf From 6f1a15013d6e6eaa1ce967d149d700cb9481c4b2 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 14 Sep 2022 18:26:05 +0000 Subject: [PATCH 118/126] Fix in MANIFEST --- MANIFEST | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MANIFEST b/MANIFEST index 9216d59c..952825e3 100644 --- a/MANIFEST +++ b/MANIFEST @@ -26,7 +26,7 @@ conf-logstash/90-additional-fields.conf conf-logstash/95-cleanup.conf conf-logstash/98-post-process.conf conf-logstash/99-output-rabbit.conf -conf-logstash/99-output-file.conf +conf-logstash/99-output-file.conf.disabled conf-logstash/99-output-elastic.conf.disabled conf-logstash/99-output-stdout.conf.disabled conf-logstash/ruby/anonymize_ipv6.rb From 449c57621c29bfbd66b9debb97060f64c2f9d8cf Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 15 Sep 2022 19:30:08 +0000 Subject: [PATCH 119/126] Set duration threshold to 1 sec --- CHANGES.md | 2 +- conf-logstash/41-thresholds.conf | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 7738d2e1..bdc686fd 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -20,7 +20,7 @@ Features: * For netflow, in 40-aggregation.conf, the start time of incoming flows will be adjusted if duration is greater than the active timeout (ie, for "updates" to long lasting flows) * The default inactive timeout for logstash aggregation has been set to 6 minutes (to go with 5 minute sflow aggregation by sfacctd) - * Added 41-thresholds.conf - applies size threshold of 10 MB (ie, drop smaller flows) and duration threshold of 0.1 sec (ie, set + * Added 41-thresholds.conf - applies size threshold of 10 MB (will drop smaller flows) and duration threshold of 1.0 sec (will set duration and rates to 0 if shorter) after aggregation is finished. * Added new field: @sampling_corrected = yes/no. If sampling rate correction has been applied by pmacct or logstash, value will be yes. * Sampling rate corrections will be done in logstash when requested (ie, flag is set in the env file) but diff --git a/conf-logstash/41-thresholds.conf b/conf-logstash/41-thresholds.conf index f7eff9f4..80e63659 100644 --- a/conf-logstash/41-thresholds.conf +++ b/conf-logstash/41-thresholds.conf @@ -9,8 +9,9 @@ filter { } # If duration is too small, it's almost certainly inaccurate and it will make rates inaccurate. - # For durations under the threshold, set duration and rates to 0. Default is 0.1 sec. - if [values][duration] < 0.1 { + # Netsage is also not interested in very small flows and we don't want to see them listed as fastest. + # For durations under the threshold, set duration and rates to 0. + if [values][duration] < 1.0 { mutate { id => "41-2" replace => {"[values][duration]" => 0.0} From fa71c96da035672e9ec9cc02cbc1c11b1656c351 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Fri, 23 Sep 2022 21:44:33 +0000 Subject: [PATCH 120/126] Moving to example sensor_groups and sensor_types files --- CHANGES.md | 9 +++--- MANIFEST | 4 +-- bin/docker-netsage-downloads.sh.ORIG | 2 +- conf-logstash/support/sensor_groups.json | 30 ------------------- .../support/sensor_groups.json.example | 4 +-- conf-logstash/support/sensor_types.json | 29 ------------------ .../support/sensor_types.json.example | 4 +-- grnoc-netsage-pipeline.spec | 4 +-- 8 files changed, 13 insertions(+), 73 deletions(-) delete mode 100644 conf-logstash/support/sensor_groups.json delete mode 100644 conf-logstash/support/sensor_types.json diff --git a/CHANGES.md b/CHANGES.md index bdc686fd..8e13aaf3 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -28,19 +28,18 @@ Features: * Sensor list for sampling rate corrections in the env file is now semicolon-delimited. * Allowed "ALL" when specifying sensors for sampling rate corrections. * When a sampling rate correction is applied by logstash, add a tag with the rate. - * Added CERN and Utah regexes to sensor type and group files. * Added an option to skip de-identification. Set it in .env. * 0.0.0.x and 0.0.0.0 flows are tagged and dropped by default. (Unadvertised option to keep them is available in the env file.) - * Changed to sensor_groups.json.example and sensor_types.json.example. From now on, our particular files/regexes will be downloaded from - scienceregistry.grnoc. * Added setup-cron.sh script which copies .ORIG .cron and .sh files and writes in username and the location of the git checkout. The user must copy cron files to /etc/cron.d/. * One cron file runs a script to download all files (caida, maxmind, etc) from scienceregistry.grnoc once/wk. * Another cron file restarts the logstash container each day. + * Changed sensor_groups.json and sensor_types.json in the git checkout to .example files. From now on, our particular files/regexes + will be downloaded from scienceregistry.grnoc by a cron job (different cron jobs for docker and bare-metal installations). * Docker-compose.yml ensures logstash runs with uid 1000, while setup-cron.sh sets the owner of logstash-temp/ to 1000, so logstash can write and read aggregation map files when it stops and starts. (User 1000 could be anyone on the host; name doesn't matter.) - * AARNET privatization is no longer needed, so added .disabled to 80-privatize-org.conf, leaving a generalized version there as an - example. Moved lines making the org name consistent to 95-cleanup.conf. + * AARNET privatization is no longer needed, so added .disabled to 80-privatize-org.conf, and made it into a generalized version as an + example. Moved lines making the AARNET org name consistent to 95-cleanup.conf. * Documentation updates * Dependabot automatic remediations of vulnerabilites (for docusaurus) diff --git a/MANIFEST b/MANIFEST index 952825e3..8cad8bdf 100644 --- a/MANIFEST +++ b/MANIFEST @@ -31,8 +31,8 @@ conf-logstash/99-output-elastic.conf.disabled conf-logstash/99-output-stdout.conf.disabled conf-logstash/ruby/anonymize_ipv6.rb conf-logstash/ruby/domestic.rb -conf-logstash/support/sensor_groups.json -conf-logstash/support/sensor_types.json +conf-logstash/support/sensor_groups.json.example +conf-logstash/support/sensor_types.json.example conf-logstash/support/networkA-members-list.rb.example cron.d/baremetal-netsage-downloads.cron cron.d/restart-logstash-service.cron diff --git a/bin/docker-netsage-downloads.sh.ORIG b/bin/docker-netsage-downloads.sh.ORIG index e5336cb5..98cb2e98 100755 --- a/bin/docker-netsage-downloads.sh.ORIG +++ b/bin/docker-netsage-downloads.sh.ORIG @@ -11,7 +11,7 @@ # DOWNLOAD_PATH="/var/lib/grnoc/netsage" # SUPPORT_PATH="/etc/logstash/conf.d/support" -DOWNLOAD_PATH="-PATH-TO-GIT-CHECKOUT-/downloads" +DOWNLOAD_PATH="-PATH-TO-GIT-CHECKOUT-/logstash-downloads" SUPPORT_PATH="-PATH-TO-GIT-CHECKOUT-/conf-logstash/support" # MAXMIND ASN diff --git a/conf-logstash/support/sensor_groups.json b/conf-logstash/support/sensor_groups.json deleted file mode 100644 index 3976d12a..00000000 --- a/conf-logstash/support/sensor_groups.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "^.*cenic.*": "CENIC", - "^CERN.*": "CERN", - "^FRGP.*": "FRGP", - "^GEANT.*": "GEANT", - "^gpn-.*": "GPN", - "^GPN-.*": "GPN", - "^GPN .*": "GPN", - "^.*Hawaii.*": "University of Hawaii", - "^i-Light.*": "I-Light", - "^GigaPOP.*": "I-Light", - "^NEAAR.*": "NEAAR", - "^NEA3R.*": "NEAAR", - "^NORDUnet.*": "NORDUnet", - "^.*nersc.*": "NERSC", - "^.*pacificwave.*": "PacWave", - "^.*pnw-gigapop\\.net$": "PacWave", - "^LEARN.*": "LEARN", - "^PennREN.*": "PennREN", - "^SANReN.*": "SANReN", - "^SingAREN.*": "SingAREN", - "^.*sox.*": "SOX", - "^.*SoX.*": "SOX", - "^Sun Corridor.*": "Sun Corridor", - "^TACC.*": "TACC", - "^tacc.*": "TACC", - "^TransPAC.*": "TransPAC", - "^.*UCAR.*": "UCAR", - "^Utah.*": "Utah" -} diff --git a/conf-logstash/support/sensor_groups.json.example b/conf-logstash/support/sensor_groups.json.example index d2470717..8706bcb5 100644 --- a/conf-logstash/support/sensor_groups.json.example +++ b/conf-logstash/support/sensor_groups.json.example @@ -1,4 +1,4 @@ { - "^A.*$": "sensor group A", - "^B.*$": "sensor group B" + "^networkA-.*": "group A", + "^networkB-.*": "group B" } diff --git a/conf-logstash/support/sensor_types.json b/conf-logstash/support/sensor_types.json deleted file mode 100644 index 355d1a40..00000000 --- a/conf-logstash/support/sensor_types.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "^.*Tstat$": "Data Archive", - "^.*nersc\\.gov$": "Data Archive", - "^GEANT.*$": "Circuit", - "^Hawaii.*netflow$": "Circuit", - "^NEAAR.*": "Circuit", - "^NEA3R.*": "Circuit", - "^NORDUnet.*$": "Circuit", - "^TransPAC.*": "Circuit", - "^.*pacificwave\\.net$": "Exchange Point", - "^.*pnw-gigapop\\.net$": "Exchange Point", - "^SingAREN.*$": "Exchange Point", - "^.*cenic.*$": "Regional Network", - "^FRGP.*$": "Regional Network", - "^GigaPOP.*$": "Regional Network", - "^gpn-.*$": "Regional Network", - "^GPN-.*$": "Regional Network", - "^GPN .*$": "Regional Network", - "^i-Light.*$": "Regional Network", - "^LEARN.*$": "Regional Network", - "^PennREN.*$": "Regional Network", - "^SANReN.*$": "Regional Network", - "^.*sox.*$": "Regional Network", - "^.*SoX.*$": "Regional Network", - "^Sun Corridor.*$": "Regional Network", - "^tacc_netflows$": "Regional Network", - "^CERN.*$": "Facility Edge", - "^Utah.*$": "Campus" -} diff --git a/conf-logstash/support/sensor_types.json.example b/conf-logstash/support/sensor_types.json.example index e17f8763..9337d60b 100644 --- a/conf-logstash/support/sensor_types.json.example +++ b/conf-logstash/support/sensor_types.json.example @@ -1,5 +1,5 @@ { "^.*Tstat$": "Data Archive", - "^Network A -.*$": "Circuit", - "^Network B -.*$": "Regional Network" + "^Network A -.*": "Circuit", + "^Network B -.*": "Regional Network" } diff --git a/grnoc-netsage-pipeline.spec b/grnoc-netsage-pipeline.spec index 29994f2b..5c2644a8 100644 --- a/grnoc-netsage-pipeline.spec +++ b/grnoc-netsage-pipeline.spec @@ -142,8 +142,8 @@ rm -rf $RPM_BUILD_ROOT %config /etc/logstash/conf.d/99-output-stdout.conf.disabled %config /etc/logstash/conf.d/ruby/anonymize_ipv6.rb %config /etc/logstash/conf.d/ruby/domestic.rb -%config /etc/logstash/conf.d/support/sensor_groups.json -%config /etc/logstash/conf.d/support/sensor_types.json +%config /etc/logstash/conf.d/support/sensor_groups.json.example +%config /etc/logstash/conf.d/support/sensor_types.json.example %config /etc/logstash/conf.d/support/networkA-members-list.rb.example %config /etc/pmacct/sfacctd.conf.ORIG From 63b22d9dc5ff235a2527402e997809ce390cf02c Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Fri, 23 Sep 2022 21:46:04 +0000 Subject: [PATCH 121/126] Fixed sensor renaming in 15-sensor-specifi-changes.conf --- conf-logstash/15-sensor-specific-changes.conf | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/conf-logstash/15-sensor-specific-changes.conf b/conf-logstash/15-sensor-specific-changes.conf index 8134069e..a89cf256 100644 --- a/conf-logstash/15-sensor-specific-changes.conf +++ b/conf-logstash/15-sensor-specific-changes.conf @@ -77,11 +77,18 @@ filter { add_field => { "[@metadata][ifindex_sensor_rename_ifindex]" => "${ifindex_sensor_rename_ifindex:1}" } id => "15-5" } + # src and dst ifindexes are integers, so we need to convert this field before comparing! + mutate { + convert => { '[@metadata][ifindex_sensor_rename_ifindex]' => 'integer' } + id => "15-6" + } + if [meta][sensor_id] == [@metadata][ifindex_sensor_rename_old_name] - and ( [meta][src_ifindex] == [@metadata][ifindex_sensor_rename_ifindex] or [meta][dst_ifindex] == [@metadata][ifindex_sensor_rename_ifindex] ) { + and ( [meta][src_ifindex] == [@metadata][ifindex_sensor_rename_ifindex] + or [meta][dst_ifindex] == [@metadata][ifindex_sensor_rename_ifindex] ) { mutate { replace => { "[meta][sensor_id]" => "%{[@metadata][ifindex_sensor_rename_new_name]}" } - id => "15-6" + id => "15-7" } } } @@ -93,22 +100,22 @@ filter { # ALL can be used to apply the same correction to all sensors. mutate { add_field => { "[@metadata][sampling_correction_flag]" => "${sampling_correction_flag:False}" } - id => "15-7" + id => "15-8" } if [@metadata][sampling_correction_flag] == "True" and [@sampling_corrected] == "no" { mutate { add_field => { "[@metadata][sampling_correction_sensors]" => "${sampling_correction_sensors:sensor1;sensor2}" } add_field => { "[@metadata][sampling_correction_factor]" => "${sampling_correction_factor:1}" } - id => "15-8" + id => "15-9" } # make the field into an array (see comments about split above) mutate { split => { "[@metadata][sampling_correction_sensors]" => ";" } add_field => { "[@metadata][sampling_correction_sensors]" => "dummy" } - id => "15-9" + id => "15-10" } ruby { - id => "15-10" + id => "15-11" tag_on_exception => "_rubyexception B in 15-sensor-specific-changes. " code => ' # strip any leading or trailing spaces from sensor names @@ -138,24 +145,24 @@ filter { # Separate subnets with commas and lists with semicolons. mutate { add_field => { "[@metadata][subnet_filter_flag]" => "${subnet_filter_flag:False}" } - id => "15-11" + id => "15-12" } if [@metadata][subnet_filter_flag] == "True" { mutate { add_field => { "[@metadata][subnet_filter_keep]" => "${subnet_filter_keep:Some-Sensor:134.456.78.0/24}" } - id => "15-12" + id => "15-13" } mutate { # Split the string on ';' into an array of the same name (in a separate mutate, since in mutate, split happens before all add_fields) # Add a dummy array element to force it to be an array, in case there is just 1 value in the env file. split => { "[@metadata][subnet_filter_keep]" => ";" } add_field => { "[@metadata][subnet_filter_keep]" => "dummy" } - id => "15-13" + id => "15-14" } # Each (non-dummy) array element should have 'sensor-name: list-of-approved-subnets' # Use Ruby to loop and test ruby { - id => "15-14" + id => "15-15" tag_on_exception => "_rubyexception C in 15-sensor-specific-changes. " code => ' require "ipaddr" From 353ba60a30ee339a37e084f63e944a08713027ed Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Fri, 23 Sep 2022 21:47:33 +0000 Subject: [PATCH 122/126] Removed named volume for rabbit. --- .gitignore | 2 ++ docker-compose.example.yml | 8 -------- env.example | 3 ++- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 5171d55b..910b9467 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,8 @@ bin/restart-logstash-container.sh cron.d/restart-logstash-container.cron conf-pmacct/*_1* conf-pmacct/*_2* +conf-logstash/support/sensor_groups.json +conf-logstash/support/sensor_types.json # Dependencies /website/node_modules diff --git a/docker-compose.example.yml b/docker-compose.example.yml index ceae4865..a258a43e 100644 --- a/docker-compose.example.yml +++ b/docker-compose.example.yml @@ -64,10 +64,6 @@ services: # The port for the UI needs to be mapped to that on the host # To view, go to https:///rabbit - "15672:15672" - volumes: - # Use a named Volume for the rabbitmq config and files for its own use. - # We want data (msgs in the queue) to persist but don't need to see it. - - rabbit_vol:/var/lib/rabbitmq networks: - netsage-network @@ -91,8 +87,4 @@ services: depends_on: - rabbit # restart is not included since if logstash dies, there may be an error and we dont' want it to keep restarting over and over - -# named volumes -volumes: - rabbit_vol: diff --git a/env.example b/env.example index 5f9d4d4c..2c4e34d6 100644 --- a/env.example +++ b/env.example @@ -79,7 +79,7 @@ subnet_filter_flag=False full_IPs_flag=False # LOGSTASH PROCESS SETTINGS: -# memory - max java heap size +# memory - java heap size. Keep Xmx=Xms! LS_JAVA_OPTS=-Xmx4g -Xms4g # The aggregation filter requires there be only one logstash worker! Do not change. PIPELINE_WORKERS=1 @@ -89,6 +89,7 @@ PIPELINE_ECS_COMPATIBILITY=disabled # LOCAL RABBITMQ SERVER SETTINGS: # (for the post-pmacct/pre-logstash queue) +# (when running outside of docker, input_host should be localhost) RABBIT_HOST=rabbit RABBITMQ_DEFAULT_USER=guest RABBITMQ_DEFAULT_PASS=guest From 2d97cb3d4037e83af38b5c4fc8a4731a3bcaa71d Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Wed, 28 Sep 2022 20:48:12 +0000 Subject: [PATCH 123/126] pmacct processes don't need env file so removed from docker-compose. Edited some comments. --- conf-logstash/15-sensor-specific-changes.conf | 10 ++++++---- docker-compose.example.yml | 9 ++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/conf-logstash/15-sensor-specific-changes.conf b/conf-logstash/15-sensor-specific-changes.conf index a89cf256..d6651401 100644 --- a/conf-logstash/15-sensor-specific-changes.conf +++ b/conf-logstash/15-sensor-specific-changes.conf @@ -1,9 +1,11 @@ # Make any desired changes for flows from specific sensors -# Values for ${variable-name:default-value} are obtained from an environment file. For docker, from the .env file; -# for bare-metal installations, /etc/logstash/logstash-env-vars - specified in the logstash systemd file) -# If values are not provided in an env file, the defaults/examples following the :'s are used. -# With a bare-metal installation, you may also just edit this file and fill in the values you want. +# Values for ${variable-name:default-value} are obtained from the environment. For docker, from the .env file; +# for bare-metal installations, /etc/logstash/logstash-env-vars is specified in the logstash systemd file) +# If values are not provided by the environment, the defaults/examples following the :'s are used. +# +# Note that in a bare-metal installation, all logstash-pipelines use the same version of this file, so be sure options will +# apply only to those intended. You may just replace this with hardcoded "if" statements and "code" for what you want to happen. # Using env vars in conditionals has been an open issue for logstash since 2016! Workaround is to add a "flag" field. # (@metadata fields are not saved to elasticsearch) diff --git a/docker-compose.example.yml b/docker-compose.example.yml index a258a43e..77855815 100644 --- a/docker-compose.example.yml +++ b/docker-compose.example.yml @@ -1,17 +1,16 @@ version: "3.7" # Docker services and settings. -# The non-example version of this file is created by setup-pmacct-compose.sh. -# Use the override file for any manual overrides. +# Setup-pmacct-compose.sh uses this file to create the non-example version docker-compose.yml. +# Optionally, use the docker-compose.override.yml file for any further manual overrides. -# Shared network for the containers. Processes will be able to communicate over default ports. +# Shared network for the containers. Processes will be able to communicate over their default ports. networks: netsage-network: # Reusable blocks of settings x-default-pmacct-settings: &pmacct-defaults - env_file: .env volumes: # location of configs on host : location in container : read-only - ./conf-pmacct:/etc/pmacct:ro @@ -28,7 +27,7 @@ x-default-nfacct-settings: &netflow-defaults image: ghcr.io/netsage-project/nfacctd:7Jun2022 -# The containers (the setup script will replace those for sfacctd and nfacctd) +# The containers (the setup script will replace those for sfacctd and nfacctd with the correct number and names) services: sfacctd_1: From a1254697a3e2dc850aa76fa4e839b3653eb21a63 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Thu, 29 Sep 2022 20:37:22 +0000 Subject: [PATCH 124/126] Documentation stages, including manual installation instructions for pmacct ver. --- conf-logstash/80-privatize-org.conf.disabled | 10 +- website/docs/deploy/bare_metal_install.md | 318 +++++++----------- .../docs/deploy/docker_install_advanced.md | 62 ++-- website/docs/deploy/docker_install_simple.md | 100 +++--- website/docs/deploy/docker_troubleshooting.md | 2 +- website/docs/deploy/docker_upgrade.md | 10 +- website/docs/devel/docker.md | 2 +- website/docs/devel/pipeline_dataset.md | 2 + website/docs/devel/tag.md | 113 ------- website/sidebars.js | 1 - 10 files changed, 222 insertions(+), 398 deletions(-) delete mode 100644 website/docs/devel/tag.md diff --git a/conf-logstash/80-privatize-org.conf.disabled b/conf-logstash/80-privatize-org.conf.disabled index 7bc6231e..450a60bc 100644 --- a/conf-logstash/80-privatize-org.conf.disabled +++ b/conf-logstash/80-privatize-org.conf.disabled @@ -1,9 +1,11 @@ # Remove information about any organizations that have privacy rules that require us to not identify them. -### This is a fictional example -# To use, set the following in the code below: ASNs to privatize (asn_array), country the ASNs are in (called "CountryA" below); -# what to use when overwriting org names ("NetworkA"), org abbreviations ("NetA"), latitude and longitude (set to -25 and 25 below), -# scireg resource names ("NetworkA member"), and scireg resource abbrevations ("NetA member") +### The .disabled file has a fictional example +# To enable, set the following in the code below and remove the .disabled suffix. +# List the ASNs to privatize (asn_array) and specify the country the ASNs are in (called "CountryA" below). +# Specify what to use when overwriting org names (eg, "NetworkA"), org abbreviations ("NetA"), +# latitudes and longitudes (set to -25 and 25 below), +# scireg resource names ("NetworkA member"), and scireg resource abbrevations ("NetA member"). filter { diff --git a/website/docs/deploy/bare_metal_install.md b/website/docs/deploy/bare_metal_install.md index c0c21510..202c7ce2 100644 --- a/website/docs/deploy/bare_metal_install.md +++ b/website/docs/deploy/bare_metal_install.md @@ -4,7 +4,7 @@ title: Manual Installation Guide sidebar_label: Manual Installation --- -This document covers installing the NetSage Flow Processing Pipeline manually on a new machine (without using Docker). Steps should be followed below in order unless you know for sure what you are doing. This document assumes a RedHat Linux environment or one of its derivatives. +This document covers installing and running the NetSage Flow Processing Pipeline manually (without using Docker). It assumes a RedHat Linux environment or one of its derivatives. ## Data sources @@ -15,31 +15,52 @@ The Processing pipeline needs data to ingest in order to do anything. There are At least one of these must be set up on a sensor to provide the incoming flow data. -Sflow and netflow data should be sent to ports on the pipeline host where nfcapd and/or sfcapd are ready to receive it. +See the Docker Installation instuctions for more info. -Tstat data should be sent directly to the logstash input RabbitMQ queue (the same one that the Importer writes to, if it is used). From there, the data will be processed the same as sflow/netflow data. ## Installing the Prerequisites -### Installing nfdump -The nfdump package provides nfcapd and sfcapd processes which recieve flow data and write nfcapd files. -The Importer also uses nfdump. If you are only collecting tstat data, you do not need nfdump. +### Installing Pmacct +The pmacct package provides nfacctd and sfacctd processes which receive flow data and write it to a rabbitmq queue. -Nfdump is _not_ listed as a dependency of the Pipeline RPM package, as in a lot cases people are running special builds of nfdump -- but make sure you install it before you try running the Netflow Importer. If in doubt, `yum install nfdump` should work. -Flow data exported by some routers require a newer version of nfdump than the one in the CentOS repos; in these cases, it may be necessary to manually compile and install the lastest nfdump. - -:::note -It is recommended to check the version of nfdump used in the Docker installation and use the same or newer in order to be sure that any fixes for impactful issues are included. -::: +Since the pmacct devs have not released a tagged version (or docker containers) since 1.7.7, and we require some commits that fixed an issue for us on Oct 11, 2021, we need to build pmacct from master (or master from some time after Oct 11, 2021). +``` + 1. Go to the host where you want to install or upgrade nfacctd and sfacctd + 2. Get dependencies if they were not previously installed on the host (Netsage needs librabbitmq-devel and jansson-devel) + $ sudo yum install libpcap-devel pkgconfig libtool autoconf automake make + $ sudo yum install libstdc++-devel gcc-c++ librabbitmq-devel jansson-devel.x86_64 + 3. Clone the repo + $ git clone https://github.com/pmacct/pmacct.git + 4. Rename the dir to, eg, pmacct-02Jun2022/, using today's date or the date of the code you are going to check out. eg. + $ cd pmacct-02June2022 + 5. You should be in master at this point. + To build and install a specific release/tag/branch, just check out that tag/branch and proceed. + We have done testing (and made docker images) with this version: + $ git checkout 865a81e1f6c444aab32110a87d72005145fd6f74 + 6. Get ready to build sfacctd and nfacctd (the following options are needed for Netsage) + $ ./autogen.sh + $ ./configure --enable-rabbitmq --enable-jansson + 7. Build and install + $ make + $ sudo make install + $ make clean + $ make distclean + 8. Check the versions + $ sfacctd -V + $ nfacctd -V + These should give something like this where 20220602 is the date: + nfacctd 1.7.8-git [20220602-0 (5e4b0612)] +``` -If desired, you can also install nfsen, which has a UI for viewing flow data and can manage starting and stopping all the nfcapd/sfcapd processes for you.The nfsen.conf file has a section in which to configure all the sources. ### Installing RabbitMQ -The pipeline requires a RabbitMQ server. Typically, this runs on the same server as the pipeline itself, but if need be, you can separate them (for this reason, the Rabbit server is not automatically installed with the pipeline package). +A local rabbitmq instance is used to hold flow data until logstash can retreive and process it. + +Typically, the rabbitmq server runs on the same server as the pipeline itself, but if need be, you can separate them (for this reason, the Rabbit server is not automatically installed with the pipeline package). ```sh [root@host ~]# yum install rabbitmq-server @@ -53,34 +74,40 @@ Typically, the default configuration will work. Perform any desired Rabbit confi or # systemctl start rabbitmq-server.service ``` +Being able to view the user interface in a browser window is very useful. Look up how to enable it. + ### Installing Logstash -See the logstash documentation. We are currently using Version 7.10. +See the logstash documentation. We are currently using Version 7.16.2. -### Installing the EPEL repo +``` +Download and install the public signing key + sudo rpm --import https://artifacts.elastic.co/GPG-KEY-elasticsearch -Some of our dependencies come from the EPEL repo. To install this: +Create or edit /etc/yum.repos.d/ELK.repo + [logstash-7.x] + name=Elastic repository for 7.x packages + baseurl=https://artifacts.elastic.co/packages/7.x/yum + gpgcheck=1 + gpgkey=https://artifacts.elastic.co/GPG-KEY-elasticsearch + enabled=1 + autorefresh=1 + type=rpm-md -``` -[root@host ~]# yum install epel-release -``` +Install + sudo yum install logstash +``` -### Installing the GlobalNOC Open Source repo -The Pipeline package (and its dependencies that are not in EPEL) are in the GlobalNOC Open Source Repo. +### Installing the Pipeline -For Red Hat/CentOS 6, create `/etc/yum.repos.d/grnoc6.repo` with the following content. +Installing the Pipeline just copies config, cron, and systemd files to the correct locations. There are no longer any perl scripts to install. -``` -[grnoc6] -name=GlobalNOC Public el6 Packages - $basearch -baseurl=https://repo-public.grnoc.iu.edu/repo/6/$basearch -enabled=1 -gpgcheck=1 -gpgkey=https://repo-public.grnoc.iu.edu/repo/RPM-GPG-KEY-GRNOC6 -``` +The last Pipeline package released by GlobalNOC (**a non-pmacct version**) is in the GlobalNOC Open Source Repo. You can use that, if the version you want is there, or you can just build the rpm from scratch, or manually copy files to the correct locations (the .spec file indicates where). -For Red Hat/CentOS 7, create `/etc/yum.repos.d/grnoc7.repo` with the following content. +(At least formerly, some of our dependencies come from the EPEL repo. We probably don't need this repo anymore though.) + +a. To use the GlobalNOC Public repo, for Red Hat/CentOS 7, create `/etc/yum.repos.d/grnoc7.repo` with the following content. ``` [grnoc7] @@ -93,207 +120,106 @@ gpgkey=https://repo-public.grnoc.iu.edu/repo/RPM-GPG-KEY-GRNOC7 The first time you install packages from the repo, you will have to accept the GlobalNOC repo key. -## Installing the Pipeline (Importer and Logstash configs) - -Install it like this: - -``` -[root@host ~]# yum install grnoc-netsage-deidentifier -``` - -Pipeline components: - -1. Flow Filter - GlobalNOC uses this for Cenic data to filter out some flows. Not needed otherwise. -2. Netsage Netflow Importer - required to read nfcapd files from sflow and netflow importers. (If using tstat flow sensors only, this is not needed.) -3. Logstash - be sure the number of logstash pipeline workers in /etc/logstash/logstash.yml is set to 1 or flow stitching/aggregation will not work right! -4. Logstash configs - these are executed in alphabetical order. See the Logstash doc. At a minimum, the input, output, and aggregation configs have parameters that you will need to update or confirm. - -Nothing will automatically start after installation as we need to move on to configuration. - -## Importer Configuration - -Configuration files of interest are - - /etc/grnoc/netsage/deidentifier/netsage_shared.xml - Shared config file allowing configuration of collections, and Rabbit connection information - - /etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml - other settings - - /etc/grnoc/netsage/deidentifier/logging.conf - logging config - - /etc/grnoc/netsage/deidentifier/logging-debug.conf - logging config with debug enabled - -### Setting up the shared config file - -`/etc/grnoc/netsage/deidentifier/netsage_shared.xml` - -There used to be many perl-based pipeline components and daemons. At this point, only the importer is left, the rest having been replaced by logstash. The shared config file, which was formerly used by all the perl components, is read before reading the individual importer config file. - -The most important part of the shared configuration file is the definition of collections. Each sflow or netflow sensor will have its own collection stanza. Here is one such stanza, a netflow example. Instance and router-address can be left commented out. +Install the package using yum: ``` - - - /path/to/netflow-files/ - - - Netflow Sensor 1 - - - sflow - - - - - - - - +[root@host ~]# yum install grnoc-netsage-pipeline ``` -Having multiple collections in one importer can sometimes cause issues for aggregation, as looping through the collections one at a time adds to the time between the flows, affecting timeouts. You can also set up multiple Importers with differently named shared and importer config files and separate init.d files. - -There is also RabbitMQ connection information in the shared config, though queue names are set in the Importer config. (The Importer does not read from a rabbit queue, but other old components did, so both input and output are set.) - -Ideally, flows should be deidentified before they leave the host on which the data is stored. If flows that have not be deidentified need to be pushed to another node for some reason, the Rabbit connection must be encrypted with SSL. - -If you're running a default RabbitMQ config, which is open only to 'localhost' as guest/guest, you won't need to change anything here. +b. To build the rpm from a git checkout, ``` - - - 127.0.0.1 - 5672 - guest - guest - 0 - 100 - / - 1 - - - - 127.0.0.1 - 5672 - guest - guest - 0 - 100 - / - 1 - +git clone https://github.com/netsage-project/netsage-pipeline.git +git checkout master (or a branch) +cd netsage-pipeline +perl Makefile.PL +make rpm +sudo yum install //rpmbuild/RPMS/noarch/grnoc-netsage-pipeline-2.0.0-1.el7.noarch.rpm + (use "reinstall" if the version number has not changed) ``` -### Setting up the Importer config file - -`/etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml` - -This file has a few more setting specific to the Importer component which you may like to adjust. - - - Rabbit_output has the name of the output queue. This should be the same as that of the logstash input queue. - - (The Importer does not actually use an input rabbit queue, so we add a "fake" one here.) - - Min-bytes is a threshold applied to flows aggregated within one nfcapd file. Flows smaller than this will be discarded. - - Min-file-age is used to be sure files are complete before being read. - - Cull-enable and cull-ttl can be used to have nfcapd files older than some number of days automatically deleted. - - Pid-file is where the pid file should be written. Be sure this matches what is used in the init.d file. - - Keep num-processes set to 1. +c. You could also just move files manually to where they need to go. It should be fairly obvious. +- /etc/logstash/conf.d/ +- /etc/pmacct/ +- /etc/cron.d/ +- /usr/bin/ +- /etc/systemd/system/ +- /var/lib/grnoc/netsage/ and /etc/logstash/conf.d/support/ (cron downloads) -```xml - - +## Logstash Configuration Files - - - netsage_deidentifier_netflow_fake - 2 - +We normally use defaults in Logstash settings files, but for Netsage, which uses the Logstash Aggregation filter, it is **required to use only ONE logstash pipeline worker**. - - 3 - netsage_deidentifier_raw - +IMPORTANT: Be sure to set `pipeline.workers: 1` in /etc/logstash/logstash.yml and/or /etc/logstash/pipelines.yml. When running logstash on the command line, use `-w 1`. - - - 100 +The Logstash config files containing the "filters" that comprise the Pipeline are installed in /etc/logstash/conf.d/. Most should be used as-is, but the input (01-) and output (99-) configs may be modified for your use. The aggregation filter (40-) also has settings that may be changed - check the two timeouts and the aggregation maps path. - - 1 +> **When processing flows from multiple customers** +> +> - We use one logstash instance with multiple "logstash-pipelines". The logstash-pipelines are defined in /etc/logstash/pipelines.yml. +> - Each logstash-pipeline uses config files in a different directory under /etc/logstash/pipelines/. +> - Since most of the config files are the same for all logstash-pipelines, we use symlinks back to files in /etc/logstash/conf.d/. +> - The exceptions are the input, output, and aggregation files (01-, 99-, and 40-). These are customized so that each logstash-pipeline reads from a different rabbit queue, saves in-progress aggregations to a different file when logstash stops, and writes to a different rabbit queue after processing. +> - We normally use one input rabbit queue and logstash-pipeline per customer (where one customer may have multiple sensors), but if there are too many sensors, with too much data, we may split them up into 2 or 3 different input queues and pipelines. +> - The output rabbit queues for processed flows may be on a different host (for us, they are). There, additional independent logstash pipelines can grab the flows and stick them into elasticsearch. Various queues may connect to various ES indices. It's most convenient to put all flows from sensors that will show up in the same granfana portal together in one index (or set of dated indices). - - +Check the 15-sensor-specific-changes.conf file. When running without Docker, especially with multiple customers, it's much easier to replace the contents of that file, which reference environment file values, with hard-coded "if" stagements and clauses that do just what you need. - - /var/cache/netsage/netflow_importer.cache +ENV FILE: Our standard processing for Netsage uses the default values for environment variables. These are set directly in the logstash configs. If any of these need to be changed, you can use an environment file: `/etc/logstash/logstash-env-vars`. The systemd unit file for logstash is set to read this file if it exists. You could copy into any or all the logstash-related settings from the env.example file. - - - 100000000 +Note that this file will be read and used by all logstash-pipelines. - - 10m - - - +## Pmacct Configuration and Unit Files - - - - +Each sensor is assumed to send to a different port on the pipeline host, and each port must have a different collector listening for incoming flow data. With pmacct, these collectors are nfacctd and sfacctd processes. Each requires its own config files and systemd unit file. - - - /var/run/netsage-netflow-importer-daemon.pid - +The easiest way to make the config files is to use the .env file and the setup-pmacct-compose.sh script that were primarily written for use with docker installations. See the Docker Installation documentation for details. +Doing just a few sensors at a time, edit the .env file and run the script. After running the script, you will find files like nfacctd_1.conf and nfacctd_1-pretag.map in conf-pmacct/ (in the git checkout). - -``` - -## Logstash Setup Notes - -Standard logstash filter config files are provided with this package. Most should be used as-is, but the input and output configs may be modified for your use. +You will have to then make the following changes: +- Rename the newly created .conf and .map files, replacing _1 with _sensorName (some string that makes sense to humans). Similarly of _2, etc. +- Edit each .conf file and change the name of the .map file within to match (the pre_tag_map value) +- Also, in each .conf file + - change the port number (nfacctd_port or sfacctd_port) to be the port to which the sensor is sending + - change the rabbit host (amqp_host) from "rabbit" to "localhost" + - change the name of the output rabbit queue (amqp_routing_key) to something unique (eg, netsage_deidentifier_raw_sensorName) +- Finally, copy the files to /etc/pmacct/ +(You can have the script make some of these changes for you if you temporarily edit the conf-pmacct/*.ORIG files.) -The aggregation filter also has settings that may be changed as well - check the two timeouts and the aggregation maps path. - -When upgrading, these logstash configs will not be overwritten. Be sure any changes get copied into the production configs. - -FOR FLOW STITCHING/AGGREGATION - IMPORTANT! -Flow stitching (ie, aggregation) will NOT work properly with more than ONE logstash pipeline worker! -Be sure to set "pipeline.workers: 1" in /etc/logstash/logstash.yml and/or /etc/logstash/pipelines.yml. When running logstash on the command line, use "-w 1". +You will also need to create systemd unit files to start and stop each process. Use systemd/sfacctd.service and nfacctd.service as examples. Each should be given a name like nfacctd-sensorName.service. Within the files, edit the config filename in two places. + ## Start Logstash ```sh -[root@host ~]# /sbin/service logstash start - or # systemctl start logstash.service +# systemctl start logstash.service ``` -It will take couple minutes to start. Log files are normally /var/log/messages and /var/log/logstash/logstash-plain.log. - -When logstash is stopped, any flows currently "in the aggregator" will be written out to /tmp/logstash-aggregation-maps (or the path/file set in 40-aggregation.conf). These will be read in and deleted when logstash is started again. +It will take a minute or two to start. Log files are normally /var/log/messages and /var/log/logstash/logstash-plain.log. `sudo systemctl status logstash` is also handy. -## Start the Importer +Be sure to check to see if it starts ok. If not, look for an error message. If all is ok, the last couple lines should be how many pipelines are running and something about connecting to rabbit. -Typically, the daemons are started and stopped via init script (CentOS 6) or systemd (CentOS 7). They can also be run manually. The daemons all support these flags: +NOTE: When logstash is stopped, any flows currently "in the aggregator" will be written out to /tmp/logstash-aggregation-maps (or the path/file set in 40-aggregation.conf). These will be read in and deleted when logstash is started again. (In some situations, it is desirable to just delete those files before restarting.) -`--config [file]` - specify which config file to read - -`--sharedconfig [file]` - specify which shared config file to read - -`--logging [file]` - the logging config - -`--nofork` - run in foreground (do not daemonize) +## Start Pmacct Processes ```sh -[root@host ~]# /sbin/service netsage-netflow-importer start - or # systemctl start netsage-netflow-importer.service +# systemctl start nfacctd-sensor1 +# systemctl start sfacctd-sensor2 +etc. ``` -The Importer will create a deamon process and a worker process. When stopping the service, the worker process might take a few minutes to quit. If it does not quit, kill it by hand. +After starting these processes, it's good to check the rabbit UI to watch for incoming flow data. Netflow data usually comes in every minute, depending on router settings, and sflow data should come in every 5 minutes since we have set sfacctd to do some pre-aggregation and send results every 5 minutes. You should also see that the messages are consumed by logstash and there is no long-term accumulation of messages in the queue. + +We have noted that in some cases, pmacct is providing so many flows that logstash cannot keep up and the number of messages in the queue just keeps increaseing! This is an issue that has yet to be resolved. + +Flows should exit the pipeline (and appear in Elasticsearch) after about 15 minutes. The delay is due to aggregation. Long-lasting flows will take longer to exit. ## Cron jobs -Sample cron files are provided. Please review and uncomment their contents. These periodically download MaxMind, CAIDA, and Science Registry files, and also restart logstash. Logstash needs to be restarted in order for any updated files to be read in. +Inactive cron files are installed (and provided in the cron.d/ directory of the git checkout). Baremetal-netsage-downloads.cron and restart-logstash-service.cron should be in /etc/cron.d/. Please review and uncomment their contents. + +These periodically download MaxMind, CAIDA, and Science Registry files, and also restart logstash. Logstash needs to be restarted in order for any updated files to be used. diff --git a/website/docs/deploy/docker_install_advanced.md b/website/docs/deploy/docker_install_advanced.md index d15525b8..04123221 100644 --- a/website/docs/deploy/docker_install_advanced.md +++ b/website/docs/deploy/docker_install_advanced.md @@ -29,47 +29,22 @@ netflowSensorName_3=The 3rd Netflow Sensor Name netflowPort_3=9002 ``` -#### b. Edit docker-composeoverride_example.yml - -Add more nfacctd services to the **example** override file. When copying and pasting, replace _1 with _2 or _3 in three places! Your file should look look something like this (remember you'll need to do this again after an upgrade! We need to fix the script to do this automatically): +#### b. Rerun setup-pmacct-compose.sh ``` -nfacctd_1: - ports: - # port on host receiving flow data : port in the container - - "${netflowPort_1}:${netflowContainerPort_1}/udp" - -nfacctd_2: - ports: - # port on host receiving flow data : port in the container - - "${netflowPort_2}:${netflowContainerPort_2}/udp" - -nfacctd_3: - ports: - # port on host receiving flow data : port in the container - - "${netflowPort_3}:${netflowContainerPort_3}/udp" +./setup-pmacct-compose.sh ``` -#### c. Rerun setup-pmacct.sh - -Delete (after backing up) docker-compose.override.yml so the pmacct setup script can recreate it along with creating additional nfacctd config files. - -``` -rm docker-compose.override.yml -./pmacct-setup.sh -``` - -Check docker-compose.override.yml and files in conf-pmacct/ for consistency. +Check the new docker-compose.yml and files in conf-pmacct/ for consistency. #### d. Start new containers -If you are simply adding new collectors nfacctd_2 and nfacctd_3, and there are no changes to nfacctd_1, you should be able to start up the additional containers with +To be safe, bring everything down first, then back up. -```sh -docker-compose up -d ``` - -Otherwise, or to be safe, bring everything down first, then back up. +docker-compose down +docker-compose up -d +``` ## To Filter Flows by Interface If your sensors are exporting all flows, but only those using particular interfaces are relevant, use this option in the .env file. All incoming flows will be read in, but the logstash pipeline will drop those that do not have src_ifindex OR dst_inindex equal to one of those listed. (Processing a large number of unecessary flows may overwhelm logstash, so if at all possible, try to limit the flows at the router level or using iptables.) @@ -112,14 +87,16 @@ In the .env file, uncomment the appropriate section and enter the information re ```sh ifindex_sensor_rename_flag=True ifindex_sensor_rename_ifindex=10032 -ifindex_sensor_rename_old_name=IU Sflow -ifindex_sensor_rename_new_name=IU Bloomington Sflow +ifindex_sensor_rename_old_name=MyNet Sflow +ifindex_sensor_rename_new_name=MyNet Bloomington Sflow ``` -In this case, any flows from the "IU Sflow" sensor that use interface 10032 (src_ifindex = 10032 OR dst_ifindex = 10032) will have the sensor name changed from "IU Sflow" to "IU Bloomington Sflow". Currently, only one such rename can be configured in Docker and only 1 ifindex is allowed. +In this case, any flows from the "MyNet Sflow" sensor that use interface 10032 (src_ifindex = 10032 OR dst_ifindex = 10032) will have the sensor name changed from "MyNet Sflow" to "MyNet Bloomington Sflow". + +Currently, only one such rename can be configured in Docker and only 1 ifindex is allowed. :::note -Please notify the devs at IU in advance, if you need to modify a sensor name, because the regexes used for determining sensor_group and sensor_type may have to be updated. +Please notify the devs in advance, if you need to modify a sensor name, because the regexes used for determining sensor_group and sensor_type may have to be updated. ::: ## To Do Sampling Rate Corrections in Logstash @@ -129,11 +106,13 @@ In the .env file, uncomment the appropriate section and enter the information re ```sh sampling_correction_flag=True -sampling_correction_sensors=IU Bloomington Sflow; IU Indy Sflow +sampling_correction_sensors=MyNet Bloomington Sflow; MyNet Indy Sflow sampling_correction_factor=512 ``` -In this example, all flows from sensors "IU Bloomington Sflow" and "IU Indy Sflow" will have a correction factor of 512 applied by logstash. Any other sensors will not have a correction applied by logstash (presumably pmacct would apply the correction automatically). +In this example, all flows from sensors "MyNet Bloomington Sflow" and "MyNet Indy Sflow" will have a correction factor of 512 applied by logstash. Any other sensors will not have a correction applied by logstash (presumably pmacct would apply the correction automatically). + +Only one correction factor is allowed for, so you can't, for example correct Sensor A with a factor of 512 and also Sensor B with a factor of 100. >Note that if pmacct has made a sampling correction already, no additional manual correction will be applied, even if these options are set, >so this can be used *to be sure* a sampling correction is applied. @@ -174,6 +153,13 @@ See **conf-logstash/support/networkA-members-list.rb.example** for an example. At https://scienceregistry.netsage.global, you can see a hand-curated list of resources (IP blocks) which are linked to the organizations, sciences, and projects that use them. This information is used by the Netsage pipeline to tag science-related flows. If you would like to see your resources or projects included, please contact us to have them added to the Registry. +## To Use IPtables to Block Some Incoming Traffic + +In certain situations, you may want to use a firewall to block some of the traffic coming to your pipeline host so that it does not enter the docker containers. For example, if multiple routers must send to the same port on the host, but you only want to process flows from one of them, you can use iptables to block traffic from the those you don't want. + +With Docker, the INPUT chain in iptables is skipped and instead the FORWARDING chain is used. The first rule of the FORWARDING chain is to read the DOCKER-USER chain. This chain will contain docker rules that aren't overridden by docker. Rules that Docker creates are added to the DOCKER chain; do not manipulate this chain manually. + +To allow only a specific IP or network to access the containers, insert a negated rule at the top of the DOCKER-USER filter chain (or an accept then a drop all others). ## To Bring up Kibana and Elasticsearch Containers diff --git a/website/docs/deploy/docker_install_simple.md b/website/docs/deploy/docker_install_simple.md index 30486161..9efc32f3 100644 --- a/website/docs/deploy/docker_install_simple.md +++ b/website/docs/deploy/docker_install_simple.md @@ -6,25 +6,27 @@ sidebar_label: Docker Installation This deployment guide describes how to deploy a basic Netsage setup that includes one sflow and/or one netflow collector. If you have more than one collector of either type, or other special situations, see the Docker Advanced guide. The Docker containers included in the installation are - - sfacctd_1 (sflow collector - receives sflow data and writes it to a rabbit queue) - - nfacctd_1 (netflow collector - receives netflow data and writes it to a rabbit queue) - - rabbit (the local RabbitMQ server) - - logstash (logstash pipeline that pulls from the rabbit queue, processes flows, and sends to the final destination) + - sfacctd_1 to _n - sflow collectors (one per sflow sensor) - each receives sflow data and writes it to a rabbit queue) + - nfacctd_1 to _n - netflow collector (one per netflow sensor) - each receives netflow data and writes it to a rabbit queue) + - rabbit - the local RabbitMQ server + - logstash - logstash pipeline that pulls from the rabbit queue, processes flows, and sends to the final destination -### 1. Set up a Pipeline Host -Decide where to run the Docker Pipeline and get it set up. The default java heap size for logstash is 4GB so have at least 8GB of memory. Little disk space should be needed. +### 1. Prepare a Pipeline Host +Decide where to run the Docker Pipeline, eg, create a VM. The default java heap size for logstash is 4GB so have at least 8GB of memory. Little disk space should be needed. Install Docker Engine (docker-ce, docker-ce-cli, containerd.io) - see instructions at [https://docs.docker.com/engine/install/](https://docs.docker.com/engine/install/). +This page has a good list of post-installation steps you may want or need to do: [https://docker-docs.netlify.app/install/linux/linux-postinstall/](https://docker-docs.netlify.app/install/linux/linux-postinstall/). + Start docker: ``` sudo systemctl docker start ``` -Install Docker Compose from Docker's GitHub repository - see [https://docs.docker.com/compose/install/](https://docs.docker.com/compose/install/). You need to **specify version 1.29.2** (or newer) in the curl command. +Docker Compose is not part of Docker Engine, so must be installed separately from Docker's GitHub repository - see [https://docs.docker.com/compose/install/](https://docs.docker.com/compose/install/). You need to **specify version 1.29.2** (or newer) in the curl command. -Check which file permissions new files are created with. If the *logstash* user is not able to access the logstash config files in the git checkout, you'll get an error from logstash saying there are no .conf files found even though they are there. Defaults of 775 (u=rwx, g=rwx, o=rx) should work. +Check which file permissions new files are created with on the host. If the *logstash* user is not able to access the logstash config files in the git checkout, you'll get an error from logstash saying there are no .conf files found even though they are there. Defaults of 775 (u=rwx, g=rwx, o=rx) should work. ### 2. Set up Data Sources The data processing pipeline needs data to ingest in order to do anything, of course. There are three types of data that can be consumed. @@ -34,7 +36,7 @@ The data processing pipeline needs data to ingest in order to do anything, of co - tstat At least one of these must be set up on a **sensor** (i.e., flow **exporter** / router), to provide the incoming flow data. -You can do this step later, but it will helpful to have it working first. Check it with tcpdump on the pipeline host. +You can do this step later, but it will helpful to have it working first. Configure sflow and netflow to send flow data to the pipeline host. Each sensor/router should send to a different port. You will list the port numbers in the .env file (see below). @@ -44,6 +46,7 @@ On the pipeline host, configure the firewall to allow incoming traffic from the Tstat data should be sent directly to the logstash input rabbit queue "netsage_deidentifier_raw" on the pipeline host. No collector is needed for tstat data. See the netsage-project/tstat-transport repo. (From there, logstash will grab the data and process it the same way as it processes sflow/netflow data. +Check to see if data is arriving with tcpdump. ### 3. Clone the Netsage Pipeline Project @@ -53,7 +56,7 @@ git clone https://github.com/netsage-project/netsage-pipeline.git ``` When the pipeline runs, it uses some of the files that are in the git checkout, so it is important to checkout the correct version. -Move into the netsage-pipeline/ directory (**all git, docker, and other commands below must be run from inside this directory!**), then checkout the most recent version of the pipeline (the most recent tag). It will say you are in 'detached HEAD' state. +Move into the netsage-pipeline/ directory (**all git, docker, and other commands below must be run from inside this directory!**), then checkout the most recent version of the pipeline (normally the most recent tag). It will say you are in 'detached HEAD' state. ```sh cd netsage-pipeline git checkout {tag} @@ -61,65 +64,81 @@ git checkout {tag} Replace "{tag}" with the release version you intend to use, e.g., "v2.0.0". ("Master" is the development version and is not intended for general use!) `git status` will confirm which branch you are on, e.g., master or v2.0.0. ->Files located in the git checkout that are used by the docker services and cron: ->- the .env file ->- docker-compose.yml and docker-compose.override.yml ->- files in conf-logstash/ ->- non-ORIG files in conf-pmacct/ ->- cron jobs use non-ORIG files in bin/ and cron.d/ and write to logstash-downloads/ ->- logstash may write to or read from logstash-temp/ -> On upgrade, docker-compose.yml, files in conf-logstash, ORIG and example files will be overwritten. +>Files located in the git checkout that are used by the docker containers and cron: +>- the .env file (created by setup script from example file) +>- docker-compose.yml (created by setup script from example file) and docker-compose.override.yml (optional) +>- logstash config files in conf-logstash/ +>- non-ORIG nfacctd and sfacctd config files in conf-pmacct/ (created by setup script) +>- cron jobs use non-ORIG files in bin/ (created by setup script) and save files to logstash-downloads/ +>- logstash may write to or read from logstash-temp/ when it stops or starts +>On upgrade, example and ORIG files and files in conf-logstash/ will be overwritten. ### 4. Create the Environment File -Next, copy `env.example` to `.env` then edit the .env file to set the sensor names, ports, and where to send processed flows. +Next, copy `env.example` to `.env` then edit the .env file to set the number of sensors of each type, the sensor names and ports, and where to send processed flows. ```sh cp env.example .env ``` -1. By default, the number of sflowSensors and netflowSensors is set to 1 at the top. If you know from the start that you will have only 1 sensor, set either sflowSensors or netflowSensors to 0 and comment out the sensor name and port below. +The .env file is used in multiple ways - by setup scripts as well as by docker-compose and hence logstash and rabbitmq. Everything you need to set is in this one location. - If you will have more than 1 of one type of sensor, see the Docker Advanced Options documentation. +By default, the number of sflowSensors and netflowSensors is set to 1 at the top. If you know from the start that you will have only 1 sensor, set either sflowSensors or netflowSensors to 0 and comment out the sensor name and port below. If you know that you will have more than 1 sensor of the same type, specify the number and add variables for the extra sensor names and ports. Note that the variable names need to have _1 replaced by _2, etc. For example, -2. In the next section of the .env file, declare the name of sflow sensor 1 and the port to which the exporter is sending the flows. Similarly for netflow sensor 1. +``` +sflowSensors=1 +netflowSensors=2 -3. You will also want to edit the **rabbit_output** variables. This section defines where the final data will land after going through the pipeline. By default, it will be written to a rabbitmq queue on `rabbit`, ie, the local rabbitMQ server running in the docker container, but there is nothing provided to do anything further with it. +# sflow sensors: +sflowSensorName_1=MyNetwork New York Sflow +sflowPort_1=8010 - To send processed flow data to Indiana University, you will need to obtain settings for this section from your contact. A new queue may need to be set up at IU, as well as allowing traffic from your pipeline host. (At IU, data from the this final rabbit queue will be moved into an Elasticsearch instance for storage and viewing in Netsage Portals.) +# netflow sensors: +netflowSensorName_1=MyNetwork LA Netflow +netflowPort_1=9000 +netflowSensorName_2=MyNetwork Seattle Netflow +netflowPort_2=9010 +``` :::note Sensor names uniquely identify the source of the data and will be shown in the Grafana dashboards so they should be understandable by a general audience. For example, your sensor names might be "MyNet New York Sflow" or "MyNet New York to London". (Running your proposed names by a Netsage admin would be helpful.) + +Also, pmacct does not properly handle sensor names containing commas! ::: -### 5. Run the pmacct setup script +You will also want to edit the **rabbit_output** variables. This section defines where the final data will land after going through the pipeline. By default, it will be written to a rabbitmq queue on `rabbit`, ie, the local rabbitMQ server running in the docker container, but there is nothing provided to do anything further with it. + +To send processed flow data to us, you will need to obtain settings for this section from your contact. A new queue may need to be set up on our end, as well as allowing traffic from your pipeline host. (On our end, data from the this final rabbit queue will be moved into an Elasticsearch instance for storage and viewing in Netsage Portals.) + + +### 5. Run the Pmacct/Compose Setup Script ```sh -./setup-pmacct.sh +./setup-pmacct-compose.sh ``` This script will use settings in the .env file to create pmacct (ie, nfacctd and sfacctd) config files in **conf-pmacct/** from the .ORIG files in the same directory. -It will also create **docker-compose.override.yml** from docker-compose.override_example.yml, or update it if it exists, filling in ${var} values from the .env file. (This is needed since pmacct can't use environment variables directly, like logstash can.) +It will also create **docker-compose.yml** from docker-compose.example.yml, filling in the correct number of nfacctd and sfacctd services and substituting ${var} values from the .env file. (This is needed since pmacct can't use environment variables directly, like logstash can.) -Information in the docker-compose.yml file tells docker which containers (processes) to run and sets various parameters for them. -Settings in the docker-compose.override.yml file will overrule and add to those. Note that docker-compose.yml should not be edited since upgrades will replace it. All customizations go in the override file, which will not be overwritten. +Information in the docker-compose.yml file tells docker which containers to run (or stop). +if needed, you can create a docker-compose.override.yml file; settings in this file will overrule and add to those in docker-compose.yml. All customizations should go in the override file, which will not be overwritten. -Check the override file to be sure it looks ok and is consistent with the new config files in conf-pmacct/. All environment variables (${x}) should be filled in. +Check the docker-compose file to be sure it looks ok and is consistent with the new config files in conf-pmacct/. All environment variables (${x}) should be filled in. Under ports, there should be two numbers separated by a colon, eg, "18001:8000/udp" -### 6. Run the cron setup script +### 6. Run the Cron Setup Script ```sh ./setup-cron.sh ``` -This script will create docker-netsage-downloads.cron and .sh and restart-logstash-container.cron and .sh files in **cron.d/** and **bin/** from .ORIG files in the same directories, filling in required information. +This script will create docker-netsage-downloads.cron and restart-logstash-container.cron in the checkout's **cron.d/** directory, along with matching .sh files in **bin/**. These are based on .ORIG files in the same directories but have required information filled in. -The downloads cron job runs the downloads shell script, which will get various files required by the pipeline from scienceregistry.grnoc.iu.edu on a weekly basis. +The docker-netsage-downloads cron job runs the downloads shell script, which will get various files required by the pipeline from scienceregistry.grnoc.iu.edu on a weekly basis. The restart cron job runs the restart shell script, which restarts the logstash container once a day. Logstash must be restarted to pick up any changes in the downloaded files. -Note that you need to manually check and then copy the .cron files to /etc/cron.d/. +**Note that you need to manually check and then copy the .cron files to /etc/cron.d/.** ```sh sudo cp cron.d/docker-netsage-downloads.cron /etc/cron.d/ @@ -132,7 +151,7 @@ Also, manually run the downloads script to immediately download the required ext bin/docker-netsage-downloads.sh ``` -Check to be sure files are in downloads/. +Check to be sure files are in logstash-downloads/. ### 8. Start up the Docker Containers @@ -158,9 +177,12 @@ docker-compose logs logstash docker-compose logs rabbit docker-compose logs sfacctd_1 docker-compose logs nfacctd_1 +etc. ``` -Add `-f`, e.g. `-f logstash` to see new log messages as they arrive. `--timestamps`, `--tail`, and `--since` are also useful -- look up details in Docker documentation. +`--timestamps`, `--tail`, and `--since` are also useful -- look up details in Docker documentation. + +When running properly, logstash logs should end with a line saying how many pipelines are running and another about connecting to rabbitmq. To shut down the pipeline (all containers) use @@ -178,13 +200,13 @@ To shut down the pipeline (all containers) use The rabbitMQ user interface can be used to see if there are incoming flows from pmacct processes and if those flows are being comsumed by logstash. -In your browser, go to ``` https:///rabbit ``` Login with username *guest*, password *guest*. Look at the small graph showing rates for incoming messages, acks, etc. You should see bursts of incoming messages and no longterm buildup of messages in the other graph. +In your browser, go to ``` https:///rabbit ``` Login with username *guest*, password *guest*. Look at the small graph showing rates for incoming messages, acks, etc. You should see bursts of incoming messages (usually once a minute for netflow and once every 5 min for sflow) and no long-term buildup of messages in the other graph. ### 10. Check for processed flows -- Ask your contact at IU to check for flows and/or look at dashboards in your grafana portal. Flows should appear after 10-15 minutes. +- Ask your contact to check for flows and/or look at dashboards in your grafana portal if it's already been set up. Flows should appear after 10-15 minutes. - Check to be sure the sensor name(s) are correct in the portal. -- Check flow sizes and rates to be sure they are reasonable. (If sampling rate corrections are not being done properly, you may have too few flows and flows which are too small.) You IU contact can check to see whether flows have @sampling_corrected=yes (a handful from the startup of netflow collection may not) and to check for unusal tags on the flows. +- Check flow sizes and rates to be sure they are reasonable. (If sampling rate corrections are not being done properly, you may have too few flows and flows which are too small.) You contact can check to see whether flows have @sampling_corrected=yes (a handful from the startup of netflow collection may not) and to check for unusal tags on the flows. If you are not seeing flows, see the Troubleshooting section of the documentation. diff --git a/website/docs/deploy/docker_troubleshooting.md b/website/docs/deploy/docker_troubleshooting.md index 7afe1dd9..babed481 100644 --- a/website/docs/deploy/docker_troubleshooting.md +++ b/website/docs/deploy/docker_troubleshooting.md @@ -37,7 +37,7 @@ sidebar_label: Troubleshooting - Make sure you don't have sflows going to a nfacctd process or vise versa. - Are there names and port numbers for each sensor? - Are the environment variable names for sensors like *_1, *_2, *_3, etc. with one sequence for sflow and one for netflow? -- Did you run setup-pmacct.sh? +- Did you run setup-pmacct-compose.sh? - In docker-compose.override.yml, make sure the ports are set correctly. You will see *port on host : port in container*. (Docker uses its own port numbers internally.) *Port on host* should match what is in .env (the port the router is sending to on the pipeline host). *Port in container* should match what is in the corresponding pmacct config. - In pmacct config files, make sure amqp_host is set to rabbit (for docker installs) or localhost (for bare metal) - In 'docker-compose ps' output, be sure the command for the sfacctd_1 container is /usr/local/sbin/sfacctd, similarly for nfacctd. diff --git a/website/docs/deploy/docker_upgrade.md b/website/docs/deploy/docker_upgrade.md index 28084035..b5456759 100644 --- a/website/docs/deploy/docker_upgrade.md +++ b/website/docs/deploy/docker_upgrade.md @@ -35,16 +35,16 @@ git pull ### 3. Recreate and check custom files -- Compare the .env to env.example to see if any changes have been made. +- Compare your .env to env.example to see if any changes have been made. Copy in any updates, particularly any relevant ones, or just recreate the .env file as you did during installation. -- Rerun the pmacct setup script to recreate the pmacct config files, in case there have been any changes. This might also update the override file. +- Run the pmacct/compose setup script to recreate the pmacct config files and the docker-compose.yml file, in case there have been any changes. ```sh - ./setup-pmacct.sh + ./setup-pmacct-compose.sh ``` -- Compare the docker-compose.override.yml file to the example. (Expect the example file to have environment variables that have gotten filled in in the non-example file.) If there are new lines or sections that are missing, copy them in. The setup script is not able to handle much in the way of changes. +- If there is a docker-compose.override.yml file, check to see if it's still needed. (For the upgrade to v2.0, you will want to get rid of the override file since we are doing everything directly in docker-compose.yml now. - Rerun the cron setup script to recreate the non-ORIG files in bin/ and cron.d/: @@ -52,7 +52,7 @@ git pull ./setup-cron.sh ``` -- Compare the resulting .cron files in the cron.d/ directory to those in /etc/cron.d/. If any have changed, copy them to /etc/cron.d/. +- Compare the resulting files in the cron.d/ directory to those in /etc/cron.d/. If any have changed, copy them to /etc/cron.d/. ### 4. Restart all the Docker Containers diff --git a/website/docs/devel/docker.md b/website/docs/devel/docker.md index e8764038..b124d463 100644 --- a/website/docs/devel/docker.md +++ b/website/docs/devel/docker.md @@ -31,7 +31,7 @@ docker-compose logs -f # view logs for all containers docker-compose logs -f # view logs for container, eg logstash ``` -## To Build Docker Images +## To Build Pmacct Docker Images We will normally use official images for rabbitMQ, logstash, nfacctd, and sfacctd, so no building of images is required. diff --git a/website/docs/devel/pipeline_dataset.md b/website/docs/devel/pipeline_dataset.md index 0319cc27..ce98a436 100644 --- a/website/docs/devel/pipeline_dataset.md +++ b/website/docs/devel/pipeline_dataset.md @@ -4,6 +4,8 @@ title: Pipeline Replay Dataset sidebar_label: Replay Dataset --- +(We haven't been using this for a long time, so it may be out of date.) + The Netsage Pipeline processes network data. Though there are some components and patterns we can use to test the behavior using things like the Ruby unit [tests](https://github.com/netsage-project/netsage-pipeline/tree/master/conf-logstash/ruby/spec) in logstash, and the [generator](https://www.elastic.co/guide/en/logstash/current/plugins-inputs-generator.html) pligin, but the best test is to replay network data and inspect the output in the grafana dashboard. diff --git a/website/docs/devel/tag.md b/website/docs/devel/tag.md deleted file mode 100644 index 18819a89..00000000 --- a/website/docs/devel/tag.md +++ /dev/null @@ -1,113 +0,0 @@ ---- -id: docker_dev_tag -title: How to Release a New Version of the Pipeline -sidebar_label: Making Releases ---- - -If a new version of nfdump needs to be used, make the new nfdump-collector image(s) first (see below) and update the docker-compose files with the new version number, then make new pipeline_importer and pipeline_logstash images.. - -## Make an RPM Release - -Use standard procedures to create an rpm of the new version of the pipeline. Update the version number and the CHANGES file, build the rpm, repoify, etc., then upgrade grnoc-netsage-deidentifier on bare-metal hosts using yum. If all works well, do the following steps to create new Docker images with which to upgrade Docker deployments. - -## In Github, Create a Release Tag - -Create a new Tag or Release in Github, eg, v1.2.11. -Be sure to copy info from the CHANGES file into the Release description. - -## To Build and Push Images Manually - -Below is the procedure to build pipeline_importer and pipeline_logstash images manually. - -Install docker-compose if not done already. See the Docker Installation instructions. - -Git clone (or git pull) the pipeline project and check out the tag you want to build, then set the version number in docker-compose.build.yml using the script. Eg, for v1.2.11, -``` -git clone https://github.com/netsage-project/netsage-pipeline.git -cd netsage-pipeline -git checkout -b v1.2.11 -./scripts/docker_select_version.sh 1.2.11 -``` - -Then build the pipeline_importer and pipeline_logstash images and push them to Docker Hub: -``` -$ sudo systemctl start docker -$ sudo docker-compose -f docker-compose.build.yml build -$ sudo docker login - provide your DockerHub login credentials -$ sudo docker-compose -f docker-compose.build.yml push (will push images mentioned in docker-compose.yml ??) - or $ docker push $image:$tag (will push a specific image version) -$ sudo systemctl stop docker -``` -If you run into an error about retrieving a mirrorlist and could not find a valid baseurl for repo, restart docker and try again. -If that doesn't work, try adding this to /etc/hosts: `67.219.148.138 mirrorlist.centos.org`, and/or try `yum install net-tools bridge-utils`, and/or restart network.service then docker. - -The person pushing to Docker Hub must have a Docker Hub account and belong to the Netsage team (3 users are allowed, for the free level). - -It might be a good idea to test the images before pushing them. See "Test Docker Images" below. - - -## Building With Automation - -??? - -## Test Docker Images - -See the Docker installation instructions for details... - -In the git checkout of the correct version, make an .env file and a docker-compose.override.yml file. You probably want to send the processed data to a dev Elasticsearch instance. Use samplicate or some other method to have data sent to the dev host. - -Run docker_select_version.sh if you haven't already, then start it up `$ sudo docker-compose up -d`. If there are local images, they'll be used, otherwise they'll be pulled from Docker Hub. - -After about 30 minutes, you should see flows in elasticsearch. - -## Make Versioned Docs - -A new set of versioned docs also has to be tagged once you are done making changes for the latest pipeline version. See the **Docusaurus guide**. - -## To Make New Nfdump-Collector Images - -If a new version of nfdump has been released that we need, new nfdump-collector images need to be made. - -``` -$ git clone https://github.com/netsage-project/docker-nfdump-collector.git -$ cd docker-nfdump-collector -$ sudo systemctl start docker -``` - -To use squash: create a file at /etc/docker/daemon.json and put into it -``` - "experimental": true - "debug: false" -``` - -To build version $VER, eg, 1.6.23 (both regular and alpine linux versions ?): -``` -$ sudo docker build --build-arg NFDUMP_VERSION=$VER --tag netsage/nfdump-collector:$VER --squash collector -$ sudo docker build --build-arg NFDUMP_VERSION=$VER --tag netsage/nfdump-collector:alpine-$VER -f collector/Dockerfile-alpine --squash . -``` - -To push to Docker Hub and quit docker -``` -$ sudo docker login - provide your DockerHub login credentials -$ sudo docker push netsage/nfdump-collector:$VER -$ sudo systemctl stop docker -``` - -To use the new collector image in the pipeline, change the version number in docker-compose.override_example.yml. For example, to use the alpine-1.6.23 image: -``` -sflow-collector: - image: netsage/nfdump-collector:alpine-1.6.23 -... -netflow-collector: - image: netsage/nfdump-collector:alpine-1.6.23 -``` - -Remind users to make the same change in their docker-compose.override.yml file when they do the next pipeline upgrade. - - -### New Version of Logstash - -If a new version of logstash has been released that we want everyone to use, -??? diff --git a/website/sidebars.js b/website/sidebars.js index 12c9349d..539ab969 100644 --- a/website/sidebars.js +++ b/website/sidebars.js @@ -20,7 +20,6 @@ module.exports = { "devel/dev_dataset", "devel/docker_dev_guide", "devel/docusaurus", - "devel/docker_dev_tag", ] }, }; From 0893cb91f1d7a0dee021bd45bbf8a6d8777c89e1 Mon Sep 17 00:00:00 2001 From: Lisa Ensman Date: Fri, 30 Sep 2022 16:52:12 +0000 Subject: [PATCH 125/126] Tweaks to bare-metal install instructions --- website/docs/deploy/bare_metal_install.md | 105 +++++++++++----------- 1 file changed, 52 insertions(+), 53 deletions(-) diff --git a/website/docs/deploy/bare_metal_install.md b/website/docs/deploy/bare_metal_install.md index 202c7ce2..25b44ab4 100644 --- a/website/docs/deploy/bare_metal_install.md +++ b/website/docs/deploy/bare_metal_install.md @@ -18,71 +18,70 @@ At least one of these must be set up on a sensor to provide the incoming flow da See the Docker Installation instuctions for more info. -## Installing the Prerequisites - - -### Installing Pmacct +## Install Pmacct The pmacct package provides nfacctd and sfacctd processes which receive flow data and write it to a rabbitmq queue. Since the pmacct devs have not released a tagged version (or docker containers) since 1.7.7, and we require some commits that fixed an issue for us on Oct 11, 2021, we need to build pmacct from master (or master from some time after Oct 11, 2021). ``` - 1. Go to the host where you want to install or upgrade nfacctd and sfacctd - 2. Get dependencies if they were not previously installed on the host (Netsage needs librabbitmq-devel and jansson-devel) - $ sudo yum install libpcap-devel pkgconfig libtool autoconf automake make - $ sudo yum install libstdc++-devel gcc-c++ librabbitmq-devel jansson-devel.x86_64 - 3. Clone the repo - $ git clone https://github.com/pmacct/pmacct.git - 4. Rename the dir to, eg, pmacct-02Jun2022/, using today's date or the date of the code you are going to check out. eg. - $ cd pmacct-02June2022 - 5. You should be in master at this point. - To build and install a specific release/tag/branch, just check out that tag/branch and proceed. - We have done testing (and made docker images) with this version: - $ git checkout 865a81e1f6c444aab32110a87d72005145fd6f74 - 6. Get ready to build sfacctd and nfacctd (the following options are needed for Netsage) - $ ./autogen.sh - $ ./configure --enable-rabbitmq --enable-jansson - 7. Build and install - $ make - $ sudo make install - $ make clean - $ make distclean - 8. Check the versions - $ sfacctd -V - $ nfacctd -V - These should give something like this where 20220602 is the date: - nfacctd 1.7.8-git [20220602-0 (5e4b0612)] + 1. Go to the host where you want to install or upgrade nfacctd and sfacctd + 2. Get dependencies if they were not previously installed on the host + $ sudo yum install libpcap-devel pkgconfig libtool autoconf automake make + $ sudo yum install libstdc++-devel gcc-c++ librabbitmq-devel jansson-devel.x86_64 + 3. Clone the repo + $ git clone https://github.com/pmacct/pmacct.git + 4. Rename the dir to, eg, pmacct-02Jun2022/, using today's date or the date + of the code you are going to check out. eg. + $ cd pmacct-02June2022 + 5. You should be in master at this point. + To build and install a specific release/tag/branch, just check out that + tag/branch and proceed. + We have done testing (and made docker images) with this version: + $ git checkout 865a81e1f6c444aab32110a87d72005145fd6f74 + 6. Get ready to build sfacctd and nfacctd + (the following options are needed for Netsage) + $ ./autogen.sh + $ ./configure --enable-rabbitmq --enable-jansson + 7. Build and install + $ make + $ sudo make install + $ make clean + $ make distclean + 8. Check the versions + $ sfacctd -V + $ nfacctd -V + These should give something like this where 20220602 is the date: + nfacctd 1.7.8-git [20220602-0 (5e4b0612)] ``` -### Installing RabbitMQ +## Install RabbitMQ A local rabbitmq instance is used to hold flow data until logstash can retreive and process it. Typically, the rabbitmq server runs on the same server as the pipeline itself, but if need be, you can separate them (for this reason, the Rabbit server is not automatically installed with the pipeline package). ```sh -[root@host ~]# yum install rabbitmq-server +$ sudo yum install rabbitmq-server ``` Typically, the default configuration will work. Perform any desired Rabbit configuration, then, start RabbitMQ: ```sh -[root@host ~]# /sbin/service rabbitmq-server start - or # systemctl start rabbitmq-server.service +$ sudo systemctl start rabbitmq-server.service ``` Being able to view the user interface in a browser window is very useful. Look up how to enable it. -### Installing Logstash +## Install Logstash See the logstash documentation. We are currently using Version 7.16.2. ``` Download and install the public signing key - sudo rpm --import https://artifacts.elastic.co/GPG-KEY-elasticsearch + $ sudo rpm --import https://artifacts.elastic.co/GPG-KEY-elasticsearch Create or edit /etc/yum.repos.d/ELK.repo [logstash-7.x] @@ -95,13 +94,13 @@ Create or edit /etc/yum.repos.d/ELK.repo type=rpm-md Install - sudo yum install logstash + $ sudo yum install logstash ``` -### Installing the Pipeline +## Install the Netsage Pipeline -Installing the Pipeline just copies config, cron, and systemd files to the correct locations. There are no longer any perl scripts to install. +Installing the Pipeline just means copying config, cron, and systemd files to the correct locations. There are no longer any perl scripts to install. The last Pipeline package released by GlobalNOC (**a non-pmacct version**) is in the GlobalNOC Open Source Repo. You can use that, if the version you want is there, or you can just build the rpm from scratch, or manually copy files to the correct locations (the .spec file indicates where). @@ -129,12 +128,12 @@ Install the package using yum: b. To build the rpm from a git checkout, ``` -git clone https://github.com/netsage-project/netsage-pipeline.git -git checkout master (or a branch) -cd netsage-pipeline -perl Makefile.PL -make rpm -sudo yum install //rpmbuild/RPMS/noarch/grnoc-netsage-pipeline-2.0.0-1.el7.noarch.rpm +$ git clone https://github.com/netsage-project/netsage-pipeline.git +$ git checkout master (or a branch) +$ cd netsage-pipeline +$ perl Makefile.PL +$ make rpm +$ sudo yum install //rpmbuild/RPMS/noarch/grnoc-netsage-pipeline-2.0.0-1.el7.noarch.rpm (use "reinstall" if the version number has not changed) ``` @@ -165,9 +164,8 @@ The Logstash config files containing the "filters" that comprise the Pipeline ar Check the 15-sensor-specific-changes.conf file. When running without Docker, especially with multiple customers, it's much easier to replace the contents of that file, which reference environment file values, with hard-coded "if" stagements and clauses that do just what you need. -ENV FILE: Our standard processing for Netsage uses the default values for environment variables. These are set directly in the logstash configs. If any of these need to be changed, you can use an environment file: `/etc/logstash/logstash-env-vars`. The systemd unit file for logstash is set to read this file if it exists. You could copy into any or all the logstash-related settings from the env.example file. +ENV FILE: Our standard processing for Netsage uses the default values for environment variables. These are set directly in the logstash configs. If any of these need to be changed, you can use an environment file: `/etc/logstash/logstash-env-vars`. The systemd unit file for logstash is set to read this file if it exists. You could copy in any or all the logstash-related settings from the env.example file. Note that the values will apply in all logstash-pipelines. -Note that this file will be read and used by all logstash-pipelines. ## Pmacct Configuration and Unit Files @@ -178,13 +176,14 @@ The easiest way to make the config files is to use the .env file and the setup-p Doing just a few sensors at a time, edit the .env file and run the script. After running the script, you will find files like nfacctd_1.conf and nfacctd_1-pretag.map in conf-pmacct/ (in the git checkout). You will have to then make the following changes: -- Rename the newly created .conf and .map files, replacing _1 with _sensorName (some string that makes sense to humans). Similarly of _2, etc. +- Rename the newly created .conf and .map files, replacing _1 with _sensorName (some string that makes sense to humans). Similarly for _2, etc. - Edit each .conf file and change the name of the .map file within to match (the pre_tag_map value) - Also, in each .conf file - change the port number (nfacctd_port or sfacctd_port) to be the port to which the sensor is sending - change the rabbit host (amqp_host) from "rabbit" to "localhost" - - change the name of the output rabbit queue (amqp_routing_key) to something unique (eg, netsage_deidentifier_raw_sensorName) + - change the name of the output rabbit queue (amqp_routing_key) to something unique (eg, netsage_deidentifier_raw_sensorName). This must match *queue* and *key* in the 01-input-rabbit.conf file of the logstash-pipeline that is handling the sensor! - Finally, copy the files to /etc/pmacct/ + (You can have the script make some of these changes for you if you temporarily edit the conf-pmacct/*.ORIG files.) You will also need to create systemd unit files to start and stop each process. Use systemd/sfacctd.service and nfacctd.service as examples. Each should be given a name like nfacctd-sensorName.service. Within the files, edit the config filename in two places. @@ -195,17 +194,17 @@ You will also need to create systemd unit files to start and stop each process. ```sh # systemctl start logstash.service ``` -It will take a minute or two to start. Log files are normally /var/log/messages and /var/log/logstash/logstash-plain.log. `sudo systemctl status logstash` is also handy. +It will take a minute or two to start. Log files are normally written to /var/log/messages and/or /var/log/logstash/logstash-plain.log. `sudo systemctl status logstash` is also handy. -Be sure to check to see if it starts ok. If not, look for an error message. If all is ok, the last couple lines should be how many pipelines are running and something about connecting to rabbit. +Be sure to check to see if it starts ok. If it does not, look for an error message. If all is ok, the last couple lines should be something about connecting to rabbit and how many pipelines are running. -NOTE: When logstash is stopped, any flows currently "in the aggregator" will be written out to /tmp/logstash-aggregation-maps (or the path/file set in 40-aggregation.conf). These will be read in and deleted when logstash is started again. (In some situations, it is desirable to just delete those files before restarting.) +NOTE: When logstash is stopped, any flows currently "in the aggregator" will be written out to /tmp/logstash-aggregation-maps (or the path/filename set in 40-aggregation.conf). This file will be read in and deleted when logstash is started again. (In some situations, it is desirable to just delete those files by hand before restarting.) ## Start Pmacct Processes ```sh # systemctl start nfacctd-sensor1 -# systemctl start sfacctd-sensor2 +# systemctl start nfacctd-sensor2 etc. ``` @@ -219,7 +218,7 @@ Flows should exit the pipeline (and appear in Elasticsearch) after about 15 minu Inactive cron files are installed (and provided in the cron.d/ directory of the git checkout). Baremetal-netsage-downloads.cron and restart-logstash-service.cron should be in /etc/cron.d/. Please review and uncomment their contents. -These periodically download MaxMind, CAIDA, and Science Registry files, and also restart logstash. Logstash needs to be restarted in order for any updated files to be used. +These periodically download MaxMind, CAIDA, Science Registry, and member-list files, and also restart logstash. Logstash needs to be restarted in order for any updated files to be used. MaxMind GeoIP files change weekly. CAIDA updates their ASN-Organization mappings every quarter (we need to manually process those to create the file Netsage uses). Science Registry records can change at random times. From bbb8ee62936c16bece297a74311ae19be67b5f3a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 7 Jan 2023 05:01:11 +0000 Subject: [PATCH 126/126] Bump json5 from 1.0.1 to 1.0.2 in /website Bumps [json5](https://github.com/json5/json5) from 1.0.1 to 1.0.2. - [Release notes](https://github.com/json5/json5/releases) - [Changelog](https://github.com/json5/json5/blob/main/CHANGELOG.md) - [Commits](https://github.com/json5/json5/compare/v1.0.1...v1.0.2) --- updated-dependencies: - dependency-name: json5 dependency-type: indirect ... Signed-off-by: dependabot[bot] --- website/yarn.lock | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/website/yarn.lock b/website/yarn.lock index f0012637..56b03c86 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -5754,9 +5754,9 @@ json3@^3.3.3: integrity sha512-c7/8mbUsKigAbLkD5B010BK4D9LZm7A1pNItkEwiUZRpIN66exu/e7YQWysGun+TRKaJp8MhemM+VkfWv42aCA== json5@^1.0.1: - version "1.0.1" - resolved "https://registry.yarnpkg.com/json5/-/json5-1.0.1.tgz#779fb0018604fa854eacbf6252180d83543e3dbe" - integrity sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow== + version "1.0.2" + resolved "https://registry.yarnpkg.com/json5/-/json5-1.0.2.tgz#63d98d60f21b313b77c4d6da18bfa69d80e1d593" + integrity sha512-g1MWMLBiz8FKi1e4w0UyVL3w+iJceWAFBAaBnnGKOpNa5f8TLktkbre1+s6oICydWAm+HRUGTmI+//xv2hvXYA== dependencies: minimist "^1.2.0" @@ -6293,9 +6293,9 @@ minimatch@3.0.4, minimatch@^3.0.4: brace-expansion "^1.1.7" minimist@^1.2.0, minimist@^1.2.5: - version "1.2.6" - resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.6.tgz#8637a5b759ea0d6e98702cfb3a9283323c93af44" - integrity sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q== + version "1.2.7" + resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.7.tgz#daa1c4d91f507390437c6a8bc01078e7000c4d18" + integrity sha512-bzfL1YUZsP41gmu/qjrEk0Q6i2ix/cVeAhbCbqH9u3zYutS1cLg00qhrD0M2MVdCcx4Sc0UpP2eBWo9rotpq6g== minipass-collect@^1.0.2: version "1.0.2"