Skip to content

Commit

Permalink
[SGTM User Table] Deprecate SGTM's sync_users lambda, its code, and s…
Browse files Browse the repository at this point in the history
…upporting resources (#166)

### Summary

Rely solely on the S3 mapping at the
`github_usernames_to_asana_gids_s3_path` variable. This simplifies
SGTM's implementation significantly since we no longer have to pull user
information from an Asana project.

This PR should only be merged once #165 has been merged in for a few
business days, and we've confirmed that no issues are arising and no
warnings about missing usernames are in the logs.

Asana tasks:
https://app.asana.com/0/1149418478823393/1204738744867777
https://app.asana.com/0/1200487107890647/1207040604662790/f


Relevant deployment: 

CC: @suzyng83209 @prebeta @vn6 @mattbryant-asana @skeggse
@Asana/continuous-integration

### Test Plan



### Risks



Pull Request: #166



Pull Request synchronized with [Asana
task](https://app.asana.com/0/0/1207061604941776)

---------

Co-authored-by: Harshita Gupta <[email protected]>
  • Loading branch information
harshita-gupta and harshita-gupta authored Apr 16, 2024
1 parent e29a569 commit 2498b0e
Show file tree
Hide file tree
Showing 21 changed files with 143 additions and 701 deletions.
5 changes: 0 additions & 5 deletions docs/dynamodb.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,3 @@ For example:
* Github Comment / Review Comment -> Asana comment

All Asana objects created or used by SGTM should be tracked in this table. When handling incoming webhooks, SGTM will fetch relevant objects to update if they exist, otherwise create a new object and add it to the table.

#### sgtm-users
A mapping of GitHub users (github handle string) to Asana users (asana user id string).

This table is read by SGTM to convert GitHub users to corresponding Asana users. The mapping is currently kept updated via [a lambda](/src/sync_users/handler.py). Webhook events handled by SGTM should never write to this table.
6 changes: 2 additions & 4 deletions src/asana/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def _build_status_from_pull_request(pull_request: PullRequest) -> Optional[str]:

def _author_asana_user_id_from_pull_request(pull_request: PullRequest) -> Optional[str]:
return dynamodb_client.get_asana_domain_user_id_from_github_handle(
github_handle=pull_request.author_handle()
pull_request.author_handle()
)


Expand Down Expand Up @@ -172,9 +172,7 @@ def _task_assignee_from_pull_request(pull_request: PullRequest) -> Optional[str]
if asana_logic.should_set_task_owner_to_pr_author(pull_request):
return _author_asana_user_id_from_pull_request(pull_request)
assignee = pull_request.assignee()
return dynamodb_client.get_asana_domain_user_id_from_github_handle(
github_handle=assignee.login
)
return dynamodb_client.get_asana_domain_user_id_from_github_handle(assignee.login)


def _asana_display_name_for_github_user(github_user: User) -> str:
Expand Down
4 changes: 1 addition & 3 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,8 @@
ENV = os.getenv("ENV", "dev")
LOCK_TABLE = os.getenv("LOCK_TABLE", "sgtm-lock")
OBJECTS_TABLE = os.getenv("OBJECTS_TABLE", "sgtm-objects")
USERS_TABLE = os.getenv("USERS_TABLE", "sgtm-users")
ASANA_USERS_PROJECT_ID = os.getenv("ASANA_USERS_PROJECT_ID", "")
GITHUB_USERNAMES_TO_ASANA_GIDS_S3_PATH = os.getenv(
"GITHUB_USERNAMES_TO_ASANA_GIDS_S3_PATH", ""
"GITHUB_USERNAMES_TO_ASANA_GIDS_S3_PATH",
)

# Feature flags
Expand Down
128 changes: 13 additions & 115 deletions src/dynamodb/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

from src.config import (
OBJECTS_TABLE,
USERS_TABLE,
GITHUB_USERNAMES_TO_ASANA_GIDS_S3_PATH,
)
from src.logger import logger
Expand Down Expand Up @@ -120,64 +119,6 @@ def bulk_insert_github_node_to_asana_id_mapping(
]
return self.bulk_insert_items_in_batches(OBJECTS_TABLE, items)

# USERS TABLE

def DEPRECATED_bulk_insert_github_handle_to_asana_user_id_mapping(
self, gh_and_asana_ids: List[Tuple[str, str]]
):
"""Insert multiple mappings from github handle to Asana user ids.
This is marked as deprecated since we'd like to read SGTM user info from S3 instead of
DynamoDB.
"""
items = [
{
self.GITHUB_HANDLE_KEY: {"S": gh_handle},
self.USER_ID_KEY: {"S": asana_user_id},
}
for gh_handle, asana_user_id in gh_and_asana_ids
]
return self.bulk_insert_items_in_batches(USERS_TABLE, items)

@memoize
def DEPRECATED_get_asana_domain_user_id_from_github_handle(
self, github_handle: str
) -> Optional[str]:
"""
Retrieves the Asana domain user-id associated with a specific GitHub user login, or None,
if no such association exists. User-id associations are created manually via an external process.
TODO: document this process, and create scripts to encapsulate it
This is marked as deprecated since we'd like to read SGTM user info from S3 instead of
DynamoDB.
"""
response = self.client.get_item(
TableName=USERS_TABLE,
Key={self.GITHUB_HANDLE_KEY: {"S": github_handle.lower()}},
)
if "Item" in response:
return response["Item"][self.USER_ID_KEY]["S"]
else:
return None

def DEPRECATED_get_all_user_items(self) -> Iterator[DynamoDbUserItem]:
"""
Get all DynamoDb items from the USERS_TABLE
This is marked as deprecated since we'd like to read SGTM user info from S3 instead of
DynamoDB.
"""
response = self.client.scan(TableName=USERS_TABLE)
yield from response["Items"]

# May need to paginate, if the first page of data is > 1MB
# (https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Scan.html#Scan.Pagination)
while response.get("LastEvaluatedKey"):
response = self.client.scan(
TableName=USERS_TABLE, ExclusiveStartKey=response["LastEvaluatedKey"]
)
yield from response["Items"]

@staticmethod
def _create_client():
# Encapsulates creating a boto3 client connection for DynamoDb with a more user-friendly error case
Expand Down Expand Up @@ -213,8 +154,9 @@ def __init__(self):
self.github_user_mapping_key_name,
) = GITHUB_USERNAMES_TO_ASANA_GIDS_S3_PATH.split("/", 1)
else:
self.github_user_mapping_bucket_name = None
self.github_user_mapping_key_name = None
raise ConfigurationError(
"Configuration error: GITHUB_USERNAMES_TO_ASANA_GIDS_S3_PATH is not set to a valid S3 path"
)

# getter for the singleton
@classmethod
Expand Down Expand Up @@ -295,51 +237,6 @@ def insert_github_node_to_asana_id_mapping(gh_node_id: str, asana_id: str):
)


def get_asana_domain_user_id_from_github_handle(github_handle: str) -> Optional[str]:
"""
Using the singleton instance of S3Client, creating it if necessary:
Retrieves the Asana domain user-id associated with a specific GitHub user login, or None,
if no such association exists. User-id associations are created manually via an external
process.
If the S3 pathway fails, we fall back to the DynamoDb pathway.
"""

def fallback_to_dynamo_pathway() -> Optional[str]:
dynamodb_returns = DynamoDbClient.singleton().DEPRECATED_get_asana_domain_user_id_from_github_handle(
github_handle
)
if dynamodb_returns is not None:
# we only want to emit a warning about the S3 pathway if the dynamoDB lookup returns
# something that's not null
logger.warning(
"S3 lookup failed to find the Asana domain user id for github handle %s. The DynamoDB table returned %s. Error: %s",
github_handle,
dynamodb_returns,
traceback.format_exc(),
)
return dynamodb_returns

try:
if user_id := S3Client.singleton().get_asana_domain_user_id_from_github_username(
github_handle
):
return user_id
else:
return fallback_to_dynamo_pathway()
except Exception:
return fallback_to_dynamo_pathway()


def DEPRECATED_get_all_user_items() -> List[dict]:
"""
This function is marked as deprecated since it's only used by the `sync_users` Lambda function,
which we are planning to deprecate in favor of reading SGTM user info from S3 instead of DynamoDB.
"""
return DynamoDbClient.singleton().DEPRECATED_get_all_user_items()


def bulk_insert_github_node_to_asana_id_mapping(
gh_and_asana_ids: List[Tuple[str, str]]
):
Expand All @@ -353,16 +250,17 @@ def bulk_insert_github_node_to_asana_id_mapping(
)


def DEPRECATED_bulk_insert_github_handle_to_asana_user_id_mapping(
gh_and_asana_ids: List[Tuple[str, str]]
):
def get_asana_domain_user_id_from_github_handle(github_handle: str) -> Optional[str]:
"""
This function is marked as deprecated since it's only used by the `sync_users` Lambda function,
which we are planning to deprecate in favor of reading SGTM user info from S3 instead of DynamoDB.
Using the singleton instance of S3Client, creating it if necessary:
Insert multiple mappings from github handle to Asana user ids.
Retrieves the Asana domain user-id associated with a specific GitHub user login, or None,
if no such association exists. User-id associations are created manually via an external
process.
If the S3 pathway fails, we fall back to the DynamoDb pathway.
"""
# todo why/where lol
DynamoDbClient.singleton().DEPRECATED_bulk_insert_github_handle_to_asana_user_id_mapping(
gh_and_asana_ids

return S3Client.singleton().get_asana_domain_user_id_from_github_username(
github_handle
)
1 change: 0 additions & 1 deletion src/sync_users/README.md

This file was deleted.

Empty file removed src/sync_users/__init__.py
Empty file.
52 changes: 0 additions & 52 deletions src/sync_users/handler.py

This file was deleted.

74 changes: 0 additions & 74 deletions src/sync_users/sgtm_user.py

This file was deleted.

43 changes: 1 addition & 42 deletions terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,7 @@ resource "aws_iam_policy" "lambda-function-cloudwatch-policy" {
"logs:PutLogEvents"
],
"Resource": [
"arn:aws:logs:${var.aws_region}:*:log-group:/aws/lambda/${aws_lambda_function.sgtm.function_name}:*",
"arn:aws:logs:${var.aws_region}:*:log-group:/aws/lambda/${aws_lambda_function.sgtm_sync_users.function_name}:*"
"arn:aws:logs:${var.aws_region}:*:log-group:/aws/lambda/${aws_lambda_function.sgtm.function_name}:*"
],
"Effect": "Allow"
}
Expand Down Expand Up @@ -219,46 +218,6 @@ resource "aws_lambda_function" "sgtm" {
}
}

resource "aws_lambda_function" "sgtm_sync_users" {
# TODO delete this once we have fully tested the new pathway
s3_bucket = aws_s3_bucket.lambda_code_s3_bucket.bucket
s3_key = aws_s3_bucket_object.lambda_code_bundle.key
function_name = "sgtm_sync_users"
role = aws_iam_role.iam_for_lambda_function.arn
handler = "src.sync_users.handler.handler"
source_code_hash = data.archive_file.create_dist_pkg.output_base64sha256

runtime = var.lambda_runtime

timeout = 900
environment {
variables = {
API_KEYS_S3_BUCKET = var.api_key_s3_bucket_name,
API_KEYS_S3_KEY = var.api_key_s3_object
ASANA_USERS_PROJECT_ID = var.asana_users_project_id
}
}
}

resource "aws_cloudwatch_event_rule" "execute_sgtm_sync_users_event_rule" {
name = "execute_sgtm_sync_users"
description = "Execute Lambda function sgtm_sync_users on a cron-style schedule"
schedule_expression = "rate(1 hour)"
}

resource "aws_lambda_permission" "lambda_permission_for_sgtm_sync_users_schedule_event" {
statement_id = "AllowSGTMSyncUsersInvoke"
action = "lambda:InvokeFunction"
function_name = aws_lambda_function.sgtm_sync_users.function_name
principal = "events.amazonaws.com"
source_arn = aws_cloudwatch_event_rule.execute_sgtm_sync_users_event_rule.arn
}

resource "aws_cloudwatch_event_target" "execute_sgtm_sync_users_event_target" {
target_id = "execute_sgtm_sync_users_event_target"
rule = aws_cloudwatch_event_rule.execute_sgtm_sync_users_event_rule.name
arn = aws_lambda_function.sgtm_sync_users.arn
}

### API

Expand Down
Loading

0 comments on commit 2498b0e

Please sign in to comment.