Skip to content

Commit

Permalink
Rewrite
Browse files Browse the repository at this point in the history
  • Loading branch information
kiblik committed Jul 5, 2024
1 parent 424a106 commit 3675717
Show file tree
Hide file tree
Showing 8 changed files with 196 additions and 75 deletions.
46 changes: 29 additions & 17 deletions docs/content/en/integrations/notification_webhooks/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,41 @@ weight: 7
chapter: true
---


## Transition graph:

```mermaid
flowchart TD
START{START}
START{{Endpoint created}}
ALL{All states}
STATUS_ACTIVE([STATUS_ACTIVE])
STATUS_INACTIVE_TMP
STATUS_INACTIVE_400
STATUS_ACTIVE_500([STATUS_ACTIVE_500])
STATUS_INACTIVE_500
STATUS_INACTIVE_OTHERS
STATUS_INACTIVE_MANUAL
STATUS_INACTIVE_PERMANENT
STATUS_ACTIVE_TMP([STATUS_ACTIVE_TMP])
END{{Endpoint removed}}
START --> STATUS_ACTIVE
START ==> STATUS_ACTIVE
STATUS_ACTIVE --HTTP 200 or 201 --> STATUS_ACTIVE
STATUS_ACTIVE --HTTP 5xx, 429, timeout or other non-HTTP error<br> within one day from the first error--> STATUS_INACTIVE_TMP
STATUS_INACTIVE_TMP --After 60s--> STATUS_ACTIVE_500
STATUS_ACTIVE_500 --HTTP 5xx, 429, timeout or other non-HTTP error<br> within one day from the first error-->STATUS_INACTIVE_TMP
STATUS_ACTIVE_500 --HTTP 5xx, 429, timeout or other non-HTTP error<br> within one day from the first error-->STATUS_INACTIVE_500
STATUS_ACTIVE_500 --After 24h--> STATUS_ACTIVE
STATUS_ACTIVE --Any HTTP 4xx response--> STATUS_INACTIVE_400
STATUS_ACTIVE --Any other HTTP response--> STATUS_INACTIVE_OTHERS
STATUS_ACTIVE --Manual deactivation by user--> STATUS_INACTIVE_MANUAL
```
STATUS_ACTIVE --HTTP 5xx <br>or HTTP 429 <br>or Timeout--> STATUS_INACTIVE_TMP
STATUS_ACTIVE --Any HTTP 4xx response<br>or any other HTTP responsee<br>or non-HTTP error--> STATUS_INACTIVE_PERMANENT
STATUS_INACTIVE_TMP -.After 60s.-> STATUS_ACTIVE_TMP
STATUS_ACTIVE_TMP --HTTP 5xx <br>or HTTP 429 <br>or Timeout <br>within 24h<br>from the first error-->STATUS_INACTIVE_TMP
STATUS_ACTIVE_TMP -.After 24h.-> STATUS_ACTIVE
STATUS_ACTIVE_TMP --HTTP 200 or 201 --> STATUS_ACTIVE_TMP
STATUS_ACTIVE_TMP --HTTP 5xx <br>or HTTP 429 <br>or Timeout <br>within 24h from the first error<br>or any other HTTP respons or error--> STATUS_INACTIVE_PERMANENT
ALL ==Activation by user==> STATUS_ACTIVE
ALL ==Deactivation by user==> STATUS_INACTIVE_PERMANENT
ALL ==Removal of endpoint by user==> END
```

Notes:

1. Transitions:
- bold: manual changes by user
- dotted: automated by celery
- others: based on responses on webhooks
1. Nodes:
- Stadium-shaped: Active - following webhook can be send
- Rectangles: Inactive - performing of webhook will fail (and not retried)
- Hexagonal: Initial and final states
- Rhombus: All states (meta node to make graph more readable)
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by Django 4.1.13 on 2024-05-27 16:23
# Generated by Django 4.2.13 on 2024-07-05 18:54

from django.db import migrations, models
import django.db.models.deletion
Expand All @@ -17,6 +17,11 @@ class Migration(migrations.Migration):
name='enable_webhooks_notifications',
field=models.BooleanField(default=False, verbose_name='Enable Webhook notifications'),
),
migrations.AddField(
model_name='system_settings',
name='webhooks_notifications_timeout',
field=models.IntegerField(default=60, help_text='How many seconds will DefectDojo waits for response from webhook endpoint'),
),
migrations.AlterField(
model_name='notifications',
name='auto_close_engagement',
Expand Down Expand Up @@ -115,9 +120,10 @@ class Migration(migrations.Migration):
('url', models.URLField(default='', help_text='The full URL of the incoming webhook')),
('header_name', models.CharField(blank=True, default='', help_text='Name of the header required for interacting with Webhook endpoint', max_length=100, null=True)),
('header_value', models.CharField(blank=True, default='', help_text='Content of the header required for interacting with Webhook endpoint', max_length=100, null=True)),
('status', models.CharField(choices=[('active', 'Active'), ('inactive_400', 'Inactive because of 4xx error'), ('active_500', 'Active but 5xx error detected'), ('inactive_500', 'Inactive because of 5xx error'), ('inactive_others', 'Inactive because of status code unsupported'), ('inactive_manual', 'Inactive because of manual deactivation')], default='active', help_text='Status of the incoming webhook', max_length=20)),
('status', models.CharField(choices=[('active', 'Active'), ('active_tmp', 'Active but 5xx (or similar) error detected'), ('inactive_tmp', 'Temporary inactive because of 5xx (or similar) error'), ('inactive_permanent', 'Permanently inactive')], default='active', help_text='Status of the incoming webhook', max_length=20)),
('first_error', models.DateTimeField(blank=True, help_text='If endpoint is active, when error happened first time', null=True)),
('last_error', models.DateTimeField(blank=True, help_text='If endpoint is active, when error happened last time', null=True)),
('note', models.CharField(blank=True, default='', help_text='Description of the latest error', null=True)),
('owner', models.ForeignKey(blank=True, help_text='Owner/receiver of notification, if empty processed as system notification', null=True, on_delete=django.db.models.deletion.CASCADE, to='dojo.dojo_user')),
],
),
Expand Down
2 changes: 2 additions & 0 deletions dojo/fixtures/dojo_testdata.json
Original file line number Diff line number Diff line change
Expand Up @@ -3074,6 +3074,7 @@
"status": "active",
"first_error": null,
"last_error": null,
"note": null,
"owner": null
}
},
Expand All @@ -3088,6 +3089,7 @@
"status": "active",
"first_error": null,
"last_error": null,
"note": null,
"owner": 2
}
}
Expand Down
20 changes: 10 additions & 10 deletions dojo/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,8 @@ class System_Settings(models.Model):
models.BooleanField(default=False,
verbose_name=_('Enable Webhook notifications'),
blank=False)
webhooks_notifications_timeout = models.IntegerField(default=60,
help_text=_("How many seconds will DefectDojo waits for response from webhook endpoint"))

false_positive_history = models.BooleanField(
default=False, help_text=_(
Expand Down Expand Up @@ -4105,18 +4107,14 @@ class Notification_Webhooks(models.Model):
_STATUS_ACTIVE = "active"
_STATUS_INACTIVE = "inactive"
STATUS_ACTIVE = f"{_STATUS_ACTIVE}"
STATUS_INACTIVE_400 = f"{_STATUS_INACTIVE}_400"
STATUS_ACTIVE_500 = f"{_STATUS_ACTIVE}_500"
STATUS_INACTIVE_500 = f"{_STATUS_INACTIVE}_500"
STATUS_INACTIVE_OTHERS = f"{_STATUS_INACTIVE}_others"
STATUS_INACTIVE_MANUAL = f"{_STATUS_INACTIVE}_manual"
STATUS_ACTIVE_TMP = f"{_STATUS_ACTIVE}_tmp"
STATUS_INACTIVE_TMP = f"{_STATUS_INACTIVE}_tmp"
STATUS_INACTIVE_PERMANENT = f"{_STATUS_INACTIVE}_permanent"
STATUS_CHOICES = (
(STATUS_ACTIVE, "Active"),
(STATUS_INACTIVE_400, "Inactive because of 4xx error"),
(STATUS_ACTIVE_500, "Active but 5xx error detected"),
(STATUS_INACTIVE_500, "Inactive because of 5xx error"),
(STATUS_INACTIVE_OTHERS, "Inactive because of status code unsupported"),
(STATUS_INACTIVE_MANUAL, "Inactive because of manual deactivation"),
(STATUS_ACTIVE_TMP, "Active but 5xx (or similar) error detected"),
(STATUS_INACTIVE_TMP, "Temporary inactive because of 5xx (or similar) error"),
(STATUS_INACTIVE_PERMANENT, "Permanently inactive"),
)
name = models.CharField(max_length=100, default='', blank=False, unique=True,
help_text=_('Name of the incoming webhook'))
Expand All @@ -4130,6 +4128,7 @@ class Notification_Webhooks(models.Model):
help_text=_('Status of the incoming webhook'))
first_error = models.DateTimeField(help_text=_('If endpoint is active, when error happened first time'), blank=True, null=True)
last_error = models.DateTimeField(help_text=_('If endpoint is active, when error happened last time'), blank=True, null=True)
note = models.CharField(default='', blank=True, null=True, help_text=_('Description of the latest error'))
owner = models.ForeignKey(Dojo_User, editable=True, null=True, blank=True, on_delete=models.CASCADE,
help_text=_('Owner/receiver of notification, if empty processed as system notification'))

Expand Down Expand Up @@ -4618,6 +4617,7 @@ def __str__(self):
auditlog.register(Risk_Acceptance)
auditlog.register(Finding_Template)
auditlog.register(Cred_User, exclude_fields=['password'])
auditlog.register(Notification_Webhooks, exclude_fields=['header_name', 'header_value'])

from dojo.utils import calculate_grade, to_str_typed # noqa: E402 # there is issue due to a circular import

Expand Down
108 changes: 74 additions & 34 deletions dojo/notifications/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,8 @@ def send_mail_notification(event, user=None, *args, **kwargs):


def webhooks_notification_request(endpoint, event, *args, **kwargs):
from dojo.utils import get_system_setting

headers = {
"User-Agent": f"DefectDojo-{dd_version}",
"X-DefectDojo-Event": event,
Expand All @@ -336,12 +338,15 @@ def webhooks_notification_request(endpoint, event, *args, **kwargs):
headers[endpoint.header_name] = endpoint.header_value
yaml_data = create_notification_message(event, endpoint.owner, 'webhooks', *args, **kwargs)
data = yaml.safe_load(yaml_data)

timeout = get_system_setting('webhooks_notifications_timeout')

res = requests.request(
method='POST',
url=endpoint.url,
headers=headers,
json=data,
timeout=60,
timeout=timeout,
)
return res

Expand All @@ -353,9 +358,39 @@ def test_webhooks_notification(endpoint):
# for now, "raise_for_status" should be enough


@app.task(ignore_result=True)
def webhook_reactivation(*args, **kwargs):
endpoint = Notification_Webhooks.objects.get(pk=kwargs.get('endpoint_id'))

# User already changed status of endpoint
if endpoint.status != Notification_Webhooks.STATUS_INACTIVE_TMP:
return

endpoint.status = Notification_Webhooks.STATUS_ACTIVE_TMP
endpoint.save()


@app.task(ignore_result=True)
def webhook_status_cleanup(*args, **kwargs):

Notification_Webhooks.objects.filter(
status=Notification_Webhooks.STATUS_ACTIVE_TMP,
last_error__le=get_current_datetime() - timedelta(hours=24),
).update(
status=Notification_Webhooks.STATUS_ACTIVE,
first_error=None,
last_error=None,
note=f'Reactivation from {Notification_Webhooks.STATUS_ACTIVE_TMP}',
)


@dojo_async_task
@app.task
def send_webhooks_notification(event, user=None, *args, **kwargs):

ERROR_PERMANENT = 'permanent'
ERROR_TEMPORARY = 'temporary'

endpoints = Notification_Webhooks.objects.filter(owner=user)

if not endpoints:
Expand All @@ -366,7 +401,9 @@ def send_webhooks_notification(event, user=None, *args, **kwargs):
return

for endpoint in endpoints:
if not endpoint.status.startswith(Notification_Webhooks._STATUS_ACTIVE):

error = None
if endpoint.status not in [Notification_Webhooks.STATUS_ACTIVE, Notification_Webhooks.STATUS_ACTIVE_TMP]:
logger.info(f"URL for Webhook '{endpoint.name}' is not active: {endpoint.get_status_display()} ({endpoint.status})")
continue

Expand All @@ -378,48 +415,51 @@ def send_webhooks_notification(event, user=None, *args, **kwargs):
logger.debug(f"Message sent to endpoint '{endpoint.name}' sucessfully.")
continue

now = get_current_datetime()
# HTTP request passed successfully but we still need to check status code
if 500 <= res.status_code < 600 or res.status_code == 429:
error = ERROR_TEMPORARY
else:
error = ERROR_PERMANENT

# There is no reason to keep endpoint active if it is returning 4xx errors
if 400 <= res.status_code < 500:
endpoint.status = Notification_Webhooks.STATUS_INACTIVE_400
endpoint.first_error = now
endpoint.note = f"Response status code: {res.status_code}"
logger.error(f"Error when sending message to Webhooks '{endpoint.name}' (status: {res.status_code}): {res.text}")

# 5xx is also not OK
elif 500 <= res.status_code < 600:
# If there is only temporary outage (we detected 5xx first time or it was before more then one hour), we can keep endpoint active (but marked)
except requests.exceptions.Timeout as e:
error = ERROR_TEMPORARY
endpoint.note = f"Requests exception: {e}"
logger.error(f"Timeout when sending message to Webhook '{endpoint.name}'")

# First detected
# or first detected more then one hour ago
if endpoint.last_error is None or (now - endpoint.last_error).seconds > 60 * 60:
endpoint.status = Notification_Webhooks.STATUS_ACTIVE_500
endpoint.first_error = now # Yes, if last fail happen before more then hour, we are considering it as a new error
except Exception as e:
error = ERROR_PERMANENT
endpoint.note = f"Exception: {e}"
logger.exception(e)
log_alert(e, "Webhooks Notification")

# Repeated detection
else:
# Error is repeating over more then hour
if (now - endpoint.first_error).seconds > 60 * 60:
endpoint.status = Notification_Webhooks.STATUS_INACTIVE_500
now = get_current_datetime()

# This situation shouldn't happen - only if somebody was cleaning status and didn't clean first/last_error
# But we should handle it
else:
endpoint.status = Notification_Webhooks.STATUS_ACTIVE_500
endpoint.first_error = now
if error == ERROR_TEMPORARY:

# If endpoint is unstable for more then one day, it needs to be deactivated
if endpoint.first_error is not None and (now - endpoint.first_error).total_seconds() > 60 * 60 * 24:
endpoint.status = Notification_Webhooks.STATUS_INACTIVE_PERMANENT

# Well, we really accepts only 200 and 201
else:
endpoint.status = Notification_Webhooks.STATUS_INACTIVE_OTHERS
endpoint.first_error = now
# We need to monitor when outage started
if endpoint.status == Notification_Webhooks.STATUS_ACTIVE:
endpoint.first_error = now

endpoint.last_error = now
endpoint.save()
endpoint.status = Notification_Webhooks.STATUS_INACTIVE_TMP

logger.error(f"Error when sending message to Webhooks (status: {res.status_code}): {res.text}")
# In case of failure within one day, endpoint can be deactivated temporally only for one minute
# webhook_reactivation.apply_async(kwargs={'endpoint_id': endpoint.pk}, countdown=60)

except Exception as e:
logger.exception(e)
log_alert(e, "Webhooks Notification")
# There is no reason to keep endpoint active if it is returning 4xx errors
else:
endpoint.status = Notification_Webhooks.STATUS_INACTIVE_PERMANENT
endpoint.first_error = now

endpoint.last_error = now
endpoint.save()


def send_alert_notification(event, user=None, *args, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion dojo/settings/.settings.dist.py.sha256sum
Original file line number Diff line number Diff line change
@@ -1 +1 @@
cce215fa477d611d45cae69a29185e943eb209526fec2b38659666e5e9513fe3
1d797b12b15e9130fbe6de3555b9de59ea3937080908059dd77c1116a775b886
5 changes: 4 additions & 1 deletion dojo/settings/settings.dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -1140,6 +1140,10 @@ def saml2_attrib_map_format(dict):
'task': 'dojo.risk_acceptance.helper.expiration_handler',
'schedule': crontab(minute=0, hour='*/3'), # every 3 hours
},
'notification_webhook_status_cleanup': {
'task': 'dojo.notifications.helper.webhook_status_cleanup',
'schedule': timedelta(minutes=1),
},
# 'jira_status_reconciliation': {
# 'task': 'dojo.tasks.jira_status_reconciliation_task',
# 'schedule': timedelta(hours=12),
Expand All @@ -1150,7 +1154,6 @@ def saml2_attrib_map_format(dict):
# 'schedule': timedelta(hours=12)
# },


}

# ------------------------------------
Expand Down
Loading

0 comments on commit 3675717

Please sign in to comment.