Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ python-dynamodb-lock = "==0.9.1"
requests = "==2.25.1"
sgqlc = "==8.1"
typing-extensions = "~=4.6"
beautifulsoup4 = "==4.13.4"

[dev-packages]
black = "==22.3.0"
Expand Down
1,027 changes: 562 additions & 465 deletions Pipfile.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/asana/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def upsert_github_comment_to_task(comment: Comment, task_id: str):
if asana_comment_id is None:
logger.info(f"Adding comment {github_comment_id} to task {task_id}")

asana_helpers.create_attachments(comment.body(), task_id)
asana_helpers.create_attachments(comment.body_html(), task_id)

asana_comment_id = asana_client.add_comment(
task_id, asana_helpers.asana_comment_from_github_comment(comment)
Expand Down
82 changes: 62 additions & 20 deletions src/asana/helpers.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import re
import collections
import urllib.request
from bs4 import BeautifulSoup, Tag
from datetime import datetime, timedelta
from html import escape
from typing import Callable, Match, Optional, List, Dict, Set
from typing import Callable, Match, Optional, List, Dict, Set, Union, cast

import src.asana.client as asana_client
import src.asana.logic as asana_logic
Expand All @@ -24,7 +25,7 @@
from src.markdown_parser import convert_github_markdown_to_asana_xml

AttachmentData = collections.namedtuple(
"AttachmentData", "file_name file_url image_type"
"AttachmentData", "file_name file_url file_type"
)

StatusReason = collections.namedtuple("StatusReason", "is_complete reason")
Expand Down Expand Up @@ -124,7 +125,7 @@ def _author_asana_user_id_from_pull_request(pull_request: PullRequest) -> Option
def _custom_fields_from_pull_request(pull_request: PullRequest) -> Dict:
"""
We currently expect the project to have three custom fields with its corresponding enum options:
 PR Status: "Open", "Draft", "Closed", "Queued", "Merged"
PR Status: "Open", "Draft", "Closed", "Queued", "Merged"
• Build: "Success", "Failure"
• Review Status: "Needs Review", "Changes Requested", "Approved", "Not Ready"
"""
Expand Down Expand Up @@ -250,36 +251,77 @@ def asana_comment_from_github_comment(comment: Comment) -> str:
)


_image_extension_to_type = {
_file_extension_to_type = {
".png": "image/png",
".jpg": "image/jpg",
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".gif": "image/gif",
".webp": "image/webp",
".mp4": "video/mp4",
".mov": "video/mov",
}

_supported_file_extensions = set(_file_extension_to_type.keys())

def _extract_attachments(body_text: str) -> List[AttachmentData]:

def _get_file_name_from_signed_url(url: str) -> str:
return url.split("?")[0].split("/")[-1]


def _get_file_extension_from_url(url: str) -> str:
return "." + url.split("?")[0].split(".")[-1]


def _extract_attachments(body_html: str) -> List[AttachmentData]:
"""
Finds, but does not replace, all the image attachment URLS (those that end in png, gif,
jpg, or jpeg) in the body_text.
Finds, but does not replace, all the image/video attachment URLs in the body_html. Handles:
1. Regular image URLs (ending in png, gif, jpg, or jpeg)
2. Double-extension URLs (like /img.jpg/img-small.jpg)
3. GitHub asset URLs (https://github.com/user-attachments/assets/...)
"""
attachments = []
matches = re.findall(github_logic.GITHUB_ATTACHMENT_REGEX, body_text)
for img_name, img_url, img_ext in matches:
image_type = _image_extension_to_type.get(img_ext)

# Asana API accepts multiple attachments with same name, so defaulting to "github_attachment" is valid
full_name = img_name if img_name else "github_attachment"
if image_type and img_ext not in full_name:
full_name += img_ext

# Parse the body_html for img and video tags
matches = BeautifulSoup(body_html, "html.parser").find_all(["img", "video"])

for match in matches:
# Since we're searching for specific tag names, we can assert these are Tag objects
matched_tag = cast(Tag, match)

# data-canonical-src is set for assets hosted outside of github
data_canonical_src = matched_tag.get("data-canonical-src")
file_url = data_canonical_src or matched_tag.get("src")

if not file_url:
continue

file_url_str = cast(str, file_url)

file_ext = _get_file_extension_from_url(file_url_str)
if file_ext not in _supported_file_extensions:
logger.warning(f"Unsupported file extension: {file_ext}")
continue

file_type = _file_extension_to_type[file_ext]

file_title = cast(Optional[str], matched_tag.get("alt"))

if not file_title or not file_title.strip():
file_name = _get_file_name_from_signed_url(file_url_str)
else:
file_name = file_title + file_ext

attachments.append(
AttachmentData(file_name=full_name, file_url=img_url, image_type=image_type)
AttachmentData(
file_name=file_name, file_url=file_url_str, file_type=file_type
)
)

return attachments


def create_attachments(body_text: str, task_id: str) -> None:
attachments = _extract_attachments(body_text)
def create_attachments(body_html: str, task_id: str) -> None:
attachments = _extract_attachments(body_html)
for attachment in attachments:
try:
with urllib.request.urlopen(attachment.file_url) as f:
Expand All @@ -288,7 +330,7 @@ def create_attachments(body_text: str, task_id: str) -> None:
task_id,
attachment_contents,
attachment.file_name,
attachment.image_type,
attachment.file_type,
)
except Exception:
logger.warning("Attachment creation failed. Creating task comment anyway.")
Expand Down
2 changes: 1 addition & 1 deletion src/aws/dynamodb_client.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import boto3 # type: ignore
from typing import TypedDict, List, Optional, Tuple

from src.config import AWS_REGION, OBJECTS_TABLE
from src.config import OBJECTS_TABLE, AWS_REGION
from src.logger import logger


Expand Down
44 changes: 34 additions & 10 deletions src/aws/lock.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,40 @@
import boto3 # type: ignore
from datetime import timedelta
from src.config import AWS_REGION, LOCK_TABLE
from src.config import LOCK_TABLE, AWS_REGION
from python_dynamodb_lock.python_dynamodb_lock import DynamoDBLockClient # type: ignore

# Lazy initialization - these will be created when first accessed
_dynamodb_resource = None
_dynamodb_lock_client = None

dynamodb_resource = boto3.resource("dynamodb", region_name=AWS_REGION)

dynamodb_lock_client = DynamoDBLockClient(
dynamodb_resource,
table_name=LOCK_TABLE,
lease_duration=timedelta(seconds=20),
expiry_period=timedelta(
minutes=2
), # The Lambda function has a 120 second timeout by default
)
def get_dynamodb_resource():
"""Get the DynamoDB resource, creating it lazily on first access."""
global _dynamodb_resource
if _dynamodb_resource is None:
_dynamodb_resource = boto3.resource("dynamodb", region_name=AWS_REGION)
return _dynamodb_resource


def get_dynamodb_lock_client():
"""Get the DynamoDB lock client, creating it lazily on first access."""
global _dynamodb_lock_client
if _dynamodb_lock_client is None:
_dynamodb_lock_client = DynamoDBLockClient(
get_dynamodb_resource(),
table_name=LOCK_TABLE,
lease_duration=timedelta(seconds=20),
expiry_period=timedelta(
minutes=2
), # The Lambda function has a 120 second timeout by default
)
return _dynamodb_lock_client


# Create a lazy-loading module-level variable
class _LazyLockClient:
def __getattr__(self, name):
return getattr(get_dynamodb_lock_client(), name)


dynamodb_lock_client = _LazyLockClient()
Comment on lines +11 to +40
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was suggested by cursor, and I decided to leave it. From my understanding it is a best-practice to initialize things like that lazily, so it's a small improvement to current implementation.

Let me know if you think it's better to remove it.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense and looks good to keep - though I don't think it'll be huge improvement since we always need to lock client to be fully set up whenever we access it.

2 changes: 1 addition & 1 deletion src/aws/s3_client.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import boto3 # type: ignore
import json
from contextlib import closing
from typing import Optional
from typing import Optional, List

from src.config import GITHUB_USERNAMES_TO_ASANA_GIDS_S3_PATH, AWS_REGION
from src.utils import memoize
Expand Down
5 changes: 1 addition & 4 deletions src/aws/sqs_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@
import json
from typing import List, Optional

from src.config import (
AWS_REGION,
SQS_URL,
)
from src.config import SQS_URL, AWS_REGION
from src.logger import logger


Expand Down
2 changes: 1 addition & 1 deletion src/github/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def upsert_pull_request(pull_request: PullRequest):

logger.info(f"Task created for pull request {pull_request_id}: {task_id}")
dynamodb_client.insert_github_node_to_asana_id_mapping(pull_request_id, task_id)
asana_helpers.create_attachments(pull_request.body(), task_id)
asana_helpers.create_attachments(pull_request.body_html(), task_id)
_add_asana_task_to_pull_request(pull_request, task_id)
else:
logger.info(
Expand Down
1 change: 1 addition & 0 deletions src/github/graphql/fragments/FullComment.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
}
}
body
bodyHTML
... on IssueComment {
url
}
Expand Down
1 change: 1 addition & 0 deletions src/github/graphql/fragments/FullPullRequest.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
}
publishedAt
body
bodyHTML
url
}
}
Expand Down
1 change: 0 additions & 1 deletion src/github/logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

GITHUB_USERNAME_MENTION_REGEX = r"\B@([A-Za-z0-9_\-]+)(?![A-Za-z0-9_\-]*/)"
GITHUB_TEAM_MENTION_REGEX = r"\B@([a-zA-Z0-9_\-]+/[a-zA-Z0-9_\-]+)"
GITHUB_ATTACHMENT_REGEX = r"!\[(.*?)\]\((.+?(\.png|\.jpg|\.jpeg|\.gif))"

AUTOMERGE_COMMENT_WARNING_AFTER_TESTS_AND_APPROVAL = (
"**:warning: Reviewer:** If you approve this PR, it will be auto-merged as soon as"
Expand Down
3 changes: 3 additions & 0 deletions src/github/models/comment.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ def published_at(self) -> datetime:
def body(self) -> str:
return self._raw["body"]

def body_html(self) -> str:
return self._raw["bodyHTML"]

def author_handle(self) -> str:
return self.author().login()

Expand Down
3 changes: 3 additions & 0 deletions src/github/models/pull_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ def author_handle(self) -> str:
def body(self) -> str:
return self._raw["body"]

def body_html(self) -> str:
return self._raw["bodyHTML"]

def set_body(self, body: str):
self._raw = copy.deepcopy(self._raw)
self._raw["body"] = copy.deepcopy(body)
Expand Down
Loading