Skip to content

Commit

Permalink
Enhance AWS client with a resource cache (#420)
Browse files Browse the repository at this point in the history
The AWS client used throughout the provisioner now contains a
resource cache to enhance performance and reduce the number of API
calls needed for many tasks. Three AWS resource values are now
stored: environment name, private hosted zone ID, and public hosted
zone ID.

These three values are some of the most commonly used by the AWS
client so caching them does the following:
 - Reduces the number of API calls made when creating or updating
   CNAME records by roughly 80 percent.
 - Eliminates an all API calls made when sending out installation
   and cluster installation webhooks.
 - Greatly reduces the number of API calls made during cluster
   provisioning.

These three values should always remain constant in a given AWS
account so they are safe to cache for the lifetime of the client.
This also prepares the provisioner to be able to better support
managing multiple AWS accounts in a single server.
  • Loading branch information
gabrieljackson authored Feb 22, 2021
1 parent ac00069 commit 3559bc9
Show file tree
Hide file tree
Showing 17 changed files with 232 additions and 470 deletions.
12 changes: 5 additions & 7 deletions cmd/cloud/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,14 +208,12 @@ var serverCmd = &cobra.Command{
// https://github.com/aws/aws-sdk-go/blob/99cd35c8c7d369ba8c32c46ed306f6c88d24cfd7/aws/request/retryer.go#L20
MaxRetries: sdkAWS.Int(toolsAWS.DefaultAWSClientRetries),
}
awsClient := toolsAWS.NewAWSClientWithConfig(awsConfig, logger)

environment, err := awsClient.GetCloudEnvironmentName()
awsClient, err := toolsAWS.NewAWSClientWithConfig(awsConfig, logger)
if err != nil {
return errors.Wrap(err, "failed to get the AWS Cloud environment name")
return errors.Wrap(err, "failed to build AWS client")
}

err = checkRequirements(awsConfig, logger)
err = checkRequirements(logger)
if err != nil {
return errors.Wrap(err, "failed health check")
}
Expand Down Expand Up @@ -269,7 +267,7 @@ var serverCmd = &cobra.Command{
Store: sqlStore,
Supervisor: supervisor,
Provisioner: kopsProvisioner,
Environment: environment,
Environment: awsClient.GetCloudEnvironmentName(),
Logger: logger,
})

Expand Down Expand Up @@ -316,7 +314,7 @@ var serverCmd = &cobra.Command{
},
}

func checkRequirements(awsConfig *sdkAWS.Config, logger logrus.FieldLogger) error {
func checkRequirements(logger logrus.FieldLogger) error {
// Check for required tool binaries.
silentLogger := logrus.New()
silentLogger.Out = ioutil.Discard
Expand Down
36 changes: 9 additions & 27 deletions internal/mocks/aws-tools/client.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 15 additions & 19 deletions internal/provisioner/fluentbit.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,32 +96,28 @@ func (f *fluentbit) NewHelmDeployment(logger log.FieldLogger) *helmDeployment {
}

var auditLogsConf string
zoneID, err := f.awsClient.GetPrivateZoneIDForDefaultTag(logger)

tag, err := f.awsClient.GetTagByKeyAndZoneID(aws.DefaultAuditLogsCoreSecurityTagKey, f.awsClient.GetPrivateHostedZoneID(), logger)
if err != nil {
logger.WithError(err).Error("unable to get Private Zone ID with the default tag, skipping setup...")
} else {
tag, err := f.awsClient.GetTagByKeyAndZoneID(aws.DefaultAuditLogsCoreSecurityTagKey, zoneID, logger)
if err != nil {
logger.WithError(err).Errorf("unable to find %s", aws.DefaultAuditLogsCoreSecurityTagKey)
}
if tag == nil {
logger.Infof("%s is missing, skipping setup...", aws.DefaultAuditLogsCoreSecurityTagKey)
tag = &aws.Tag{}
}

hostPort := strings.Split(tag.Value, ":")
if len(hostPort) == 2 {
auditLogsConf = fmt.Sprintf(`[OUTPUT]
logger.WithError(err).Errorf("unable to find %s", aws.DefaultAuditLogsCoreSecurityTagKey)
}
if tag == nil {
logger.Infof("%s is missing, skipping setup...", aws.DefaultAuditLogsCoreSecurityTagKey)
tag = &aws.Tag{}
}

hostPort := strings.Split(tag.Value, ":")
if len(hostPort) == 2 {
auditLogsConf = fmt.Sprintf(`[OUTPUT]
Name forward
Match *
Host %s
Port %s
tls On
tls.verify Off`, hostPort[0], hostPort[1])
} else {
logger.Info("AuditLogsCoreSecurity tag is missing from R53 hosted zone, " +
"fluent-bit will be configured without forwarding to audit logs to Security")
}
} else {
logger.Info("AuditLogsCoreSecurity tag is missing from R53 hosted zone, " +
"fluent-bit will be configured without forwarding to audit logs to Security")
}

elasticSearchDNS := fmt.Sprintf("elasticsearch.%s", privateDomainName)
Expand Down
44 changes: 8 additions & 36 deletions internal/provisioner/fluentbit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ func TestNewHelmDeploymentWithAuditLogsConfiguration(t *testing.T) {
Return("mockDns", nil).
AnyTimes()
awsClient.EXPECT().
GetPrivateZoneIDForDefaultTag(gomock.Eq(logger)).
Return("mockZone", nil).
GetPrivateHostedZoneID().
Return("mockZone").
AnyTimes()
expectedTag := &aws.Tag{Key: "AuditLogsCoreSecurity", Value: "expectedURL:12345"}
awsClient.EXPECT().
Expand Down Expand Up @@ -62,8 +62,8 @@ func TestNewHelmDeploymentWithDefaultConfiguration(t *testing.T) {
Return("mockDns", nil).
AnyTimes()
awsClient.EXPECT().
GetPrivateZoneIDForDefaultTag(gomock.Eq(logger)).
Return("mockZone", nil).
GetPrivateHostedZoneID().
Return("mockZone").
AnyTimes()
expectedTag := &aws.Tag{Key: "MattermostCloudDNS", Value: "private"}
awsClient.EXPECT().
Expand All @@ -81,34 +81,6 @@ func TestNewHelmDeploymentWithDefaultConfiguration(t *testing.T) {
assert.Equal(t, "backend.es.host=elasticsearch.mockDns,rawConfig=\n@INCLUDE fluent-bit-service.conf\n@INCLUDE fluent-bit-input.conf\n@INCLUDE fluent-bit-filter.conf\n@INCLUDE fluent-bit-output.conf\n\n", helmDeployment.setArgument)
}

func TestNewHelmDeploymentWithZoneIDError(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()

provisioner := &KopsProvisioner{}
logger := log.New()
awsClient := mocks.NewMockAWS(ctrl)

awsClient.EXPECT().
GetPrivateZoneDomainName(gomock.Eq(logger)).
Return("mockDns", nil).
AnyTimes()
err1 := errors.New("Mock error expected from func GetPrivateZoneIDForDefaultTag")
awsClient.EXPECT().
GetPrivateZoneIDForDefaultTag(gomock.Eq(logger)).
Return("", err1).
AnyTimes()

kops := &kops.Cmd{}
fluentbit, err := newFluentbitHandle(&model.HelmUtilityVersion{Chart: "1.2.3"}, provisioner, awsClient, kops, logger)
require.NoError(t, err, "should not error when creating new fluentbit handler")
require.NotNil(t, fluentbit, "fluentbit should not be nil")

helmDeployment := fluentbit.NewHelmDeployment(logger)
require.NotNil(t, helmDeployment, "helmDeployment should not be nil")
assert.Equal(t, "backend.es.host=elasticsearch.mockDns,rawConfig=\n@INCLUDE fluent-bit-service.conf\n@INCLUDE fluent-bit-input.conf\n@INCLUDE fluent-bit-filter.conf\n@INCLUDE fluent-bit-output.conf\n\n", helmDeployment.setArgument)
}

func TestNewHelmDeploymentWithoutFindingAuditTag(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
Expand All @@ -121,8 +93,8 @@ func TestNewHelmDeploymentWithoutFindingAuditTag(t *testing.T) {
Return("mockDns", nil).
AnyTimes()
awsClient.EXPECT().
GetPrivateZoneIDForDefaultTag(gomock.Eq(logger)).
Return("mockZone", nil).
GetPrivateHostedZoneID().
Return("mockZone").
AnyTimes()
expectedTag := &aws.Tag{}
err1 := errors.New("Mock error expected from func GetTagByKeyAndZoneID")
Expand Down Expand Up @@ -153,8 +125,8 @@ func TestNewHelmDeploymentWithNillTag(t *testing.T) {
Return("mockDns", nil).
AnyTimes()
awsClient.EXPECT().
GetPrivateZoneIDForDefaultTag(gomock.Eq(logger)).
Return("mockZone", nil).
GetPrivateHostedZoneID().
Return("mockZone").
AnyTimes()

awsClient.EXPECT().
Expand Down
7 changes: 1 addition & 6 deletions internal/provisioner/kops_provisioner_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,7 @@ func (provisioner *KopsProvisioner) CreateCluster(cluster *model.Cluster, awsCli
}
}

environment, err := awsClient.GetCloudEnvironmentName()
if err != nil {
return errors.Wrap(err, "getting the AWS Cloud environment")
}

cncVPCName := fmt.Sprintf("mattermost-cloud-%s-command-control", environment)
cncVPCName := fmt.Sprintf("mattermost-cloud-%s-command-control", awsClient.GetCloudEnvironmentName())
cncVPCCIDR, err := awsClient.GetCIDRByVPCTag(cncVPCName, logger)
if err != nil {
return errors.Wrapf(err, "failed to get the CIDR for the VPC Name %s", cncVPCName)
Expand Down
11 changes: 1 addition & 10 deletions internal/provisioner/prometheus_operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,6 @@ func newPrometheusOperatorHandle(cluster *model.Cluster, provisioner *KopsProvis
func (p *prometheusOperator) CreateOrUpgrade() error {
logger := p.logger.WithField("prometheus-action", "create")

environment, err := p.awsClient.GetCloudEnvironmentName()
if err != nil {
return errors.Wrap(err, "failed to get environment name for thanos objstore secret")
}

if environment == "" {
return errors.New("cannot create a thanos objstore secret if environment is empty")
}

awsRegion := os.Getenv("AWS_REGION")
if awsRegion == "" {
awsRegion = aws.DefaultAWSRegion
Expand All @@ -85,7 +76,7 @@ func (p *prometheusOperator) CreateOrUpgrade() error {
secretData := map[string]interface{}{
"type": "s3",
"config": map[string]string{
"bucket": fmt.Sprintf("cloud-%s-prometheus-metrics", environment),
"bucket": fmt.Sprintf("cloud-%s-prometheus-metrics", p.awsClient.GetCloudEnvironmentName()),
"endpoint": fmt.Sprintf("s3.%s.amazonaws.com", awsRegion),
},
}
Expand Down
11 changes: 1 addition & 10 deletions internal/provisioner/teleport.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,9 @@ func newTeleportHandle(cluster *model.Cluster, desiredVersion *model.HelmUtility
return nil, errors.New("cannot create a connection to Teleport if the Kops command provided is nil")
}

environment, err := awsClient.GetCloudEnvironmentName()
if err != nil {
return nil, err
}

if environment == "" {
return nil, errors.New("cannot create a connection to Teleport if the environment is empty")
}

return &teleport{
awsClient: awsClient,
environment: environment,
environment: awsClient.GetCloudEnvironmentName(),
provisioner: provisioner,
kops: kops,
cluster: cluster,
Expand Down
9 changes: 0 additions & 9 deletions internal/provisioner/thanos.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,6 @@ func (t *thanos) ValuesPath() string {
func (t *thanos) CreateOrUpgrade() error {
logger := t.logger.WithField("thanos-action", "create")

environment, err := t.awsClient.GetCloudEnvironmentName()
if err != nil {
return errors.Wrap(err, "failed to get environment name for thanos objstore secret")
}

if environment == "" {
return errors.New("cannot create a thanos objstore secret if environment is empty")
}

awsRegion := os.Getenv("AWS_REGION")
if awsRegion == "" {
awsRegion = aws.DefaultAWSRegion
Expand Down
8 changes: 1 addition & 7 deletions internal/supervisor/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,19 +129,13 @@ func (s *ClusterSupervisor) Supervise(cluster *model.Cluster) {
return
}

environment, err := s.aws.GetCloudEnvironmentName()
if err != nil {
logger.WithError(err).Error("getting the AWS Cloud environment")
return
}

webhookPayload := &model.WebhookPayload{
Type: model.TypeCluster,
ID: cluster.ID,
NewState: newState,
OldState: oldState,
Timestamp: time.Now().UnixNano(),
ExtraData: map[string]string{"Environment": environment},
ExtraData: map[string]string{"Environment": s.aws.GetCloudEnvironmentName()},
}
err = webhook.SendToAllWebhooks(s.store, webhookPayload, logger.WithField("webhookEvent", webhookPayload.NewState))
if err != nil {
Expand Down
8 changes: 1 addition & 7 deletions internal/supervisor/cluster_installation.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,19 +129,13 @@ func (s *ClusterInstallationSupervisor) Supervise(clusterInstallation *model.Clu
return
}

environment, err := s.aws.GetCloudEnvironmentName()
if err != nil {
logger.WithError(err).Error("getting the AWS Cloud environment")
return
}

webhookPayload := &model.WebhookPayload{
Type: model.TypeClusterInstallation,
ID: clusterInstallation.ID,
NewState: newState,
OldState: oldState,
Timestamp: time.Now().UnixNano(),
ExtraData: map[string]string{"ClusterID": clusterInstallation.ClusterID, "Environment": environment},
ExtraData: map[string]string{"ClusterID": clusterInstallation.ClusterID, "Environment": s.aws.GetCloudEnvironmentName()},
}
err = webhook.SendToAllWebhooks(s.store, webhookPayload, logger.WithField("webhookEvent", webhookPayload.NewState))
if err != nil {
Expand Down
Loading

0 comments on commit 3559bc9

Please sign in to comment.