Skip to content
This repository has been archived by the owner on Aug 25, 2024. It is now read-only.

Commit

Permalink
scripts: dump discussion: Incorperate advanced patterns for GitHub Gr…
Browse files Browse the repository at this point in the history
…aphQL API

Youtube: https://www.youtube.com/watch?v=i5pIszu9MeM&t=719s
Signed-off-by: John Andersen <[email protected]>
  • Loading branch information
pdxjohnny authored and aliceoa-intel committed Jan 5, 2024
1 parent 6e5462b commit 02c7d36
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 40 deletions.
3 changes: 2 additions & 1 deletion scripts/discussion_dump_to_markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,12 +175,13 @@ async def main():
for comment in input_data["comments"]:
discussion.comments.append(
Comment(
id=comment["id"],
body=comment["body"],
replies=[],
)
)
for reply in comment["replies"]:
discussion.comments[-1].replies.append(Reply(body=reply["body"]))
discussion.comments[-1].replies.append(Reply(id=reply["id"], body=reply["body"]))
output_markdown(discussion, pathlib.Path(__file__).parents[1])
# os.system(f"rm -rf 'docs/tutorials/alice/'")

Expand Down
120 changes: 81 additions & 39 deletions scripts/dump_discussion.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""
r"""
Usage
*****
Expand All @@ -12,14 +12,19 @@
import json
from dataclasses import dataclass
from typing import List
import logging
import argparse

logger = logging.getLogger(__file__)

@dataclass
class Reply:
id: str
body: str

@dataclass
class Comment:
id: str
body: str
replies: List[Reply]

Expand All @@ -29,34 +34,47 @@ class Discussion:
title: str
comments: List[Comment]

async def fetch_discussion_data(session, token, owner, repo, discussion_number):
async def fetch_discussion_data(session, graphql_url, token, owner, repo, discussion_number):
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/json"
}

query = """
query($owner: String!, $repo: String!, $discussionNumber: Int!, $commentsCursor: String, $repliesCursor: String) {
comments_query = """
query($owner: String!, $repo: String!, $discussionNumber: Int!, $commentsCursor: String) {
repository(owner: $owner, name: $repo) {
discussion(number: $discussionNumber) {
title
body
comments(first: 100, after: $commentsCursor) {
totalCount
pageInfo {
hasNextPage
endCursor
}
nodes {
id
body
}
}
}
}
}
"""
replies_query = """
query($discussionCommentIds: [ID!]!){
nodes(ids: $discussionCommentIds) {
... on DiscussionComment {
id
replies(first: 10) {
totalCount
pageInfo {
hasNextPage
endCursor
}
nodes {
id
body
replies(first: 100, after: $repliesCursor) {
pageInfo {
hasNextPage
endCursor
}
nodes {
body
}
}
}
}
}
Expand All @@ -72,45 +90,66 @@ async def fetch_discussion_data(session, token, owner, repo, discussion_number):

discussion_data = []
has_next_page = True
comments_cursor = None
comments_by_id = {}
comments_by_id_lock = asyncio.Lock()
discussion_title = None
discussion_body = None

while has_next_page:
variables["commentsCursor"] = comments_cursor
response = await session.post("https://api.github.com/graphql", headers=headers, json={"query": query, "variables": variables})
async def paginate_replies(tg, batch_comment_ids):
nonlocal comments_by_id
nonlocal comments_by_id_lock

logger.debug("Sending nested replies pagination query: %r: %s", variables, replies_query)
response = await session.post(graphql_url, headers=headers, json={"query": replies_query, "variables": {"discussionCommentIds": batch_comment_ids}})
result = await response.json()
logger.debug("Received nested replies comments pagination query result: %s", json.dumps(result, indent=4, sort_keys=True))

if "data" not in result:
raise Exception(json.dumps(result, indent=4, sort_keys=True))
for comment in result["data"]["nodes"]:
reply_nodes = comment["replies"]["nodes"]
has_next_page = comment["replies"]["pageInfo"]["hasNextPage"]
replies_cursor = comment["replies"]["pageInfo"]["endCursor"]

async with comments_by_id_lock:
for reply in reply_nodes:
comments_by_id[comment["id"]].replies.append(Reply(id=reply["id"], body=reply["body"]))

if has_next_page:
raise NotImplementedError()

async def paginate_comments(tg, comments_cursor = None):
nonlocal comments_by_id
nonlocal comments_by_id_lock
nonlocal discussion_title
nonlocal discussion_body

variables["commentsCursor"] = comments_cursor
logger.debug("Sending top level comments pagination query: %r: %s", variables, comments_query)
response = await session.post(graphql_url, headers=headers, json={"query": comments_query, "variables": variables})
result = await response.json()
logger.debug("Received top level comments pagination query result: %s", json.dumps(result, indent=4, sort_keys=True))

discussion_title = result["data"]["repository"]["discussion"]["title"]
discussion_body = result["data"]["repository"]["discussion"]["body"]
comments = result["data"]["repository"]["discussion"]["comments"]["nodes"]
has_next_page = result["data"]["repository"]["discussion"]["comments"]["pageInfo"]["hasNextPage"]
comments_cursor = result["data"]["repository"]["discussion"]["comments"]["pageInfo"]["endCursor"]

for comment in comments:
comment_body = comment["body"]
replies = []

has_next_reply_page = True
replies_cursor = None
batch_comment_ids = []

while has_next_reply_page:
variables["repliesCursor"] = replies_cursor
response = await session.post("https://api.github.com/graphql", headers=headers, json={"query": query, "variables": variables})
reply_result = await response.json()
async with comments_by_id_lock:
for comment in comments:
comment = Comment(id=comment["id"], body=comment["body"], replies=[])
comments_by_id[comment.id] = comment
batch_comment_ids.append(comment.id)
discussion_data.append(comment)

if "replies" not in reply_result:
raise Exception(json.dumps(reply_result, indent=4, sort_keys=True))
tg.create_task(paginate_replies(tg, batch_comment_ids))

reply_nodes = comment["replies"]["nodes"]
has_next_reply_page = comment["replies"]["pageInfo"]["hasNextPage"]
replies_cursor = comment["replies"]["pageInfo"]["endCursor"]
if has_next_page:
tg.create_task(paginate_comments(tg, comments_cursor))

for reply in reply_nodes:
replies.append(Reply(body=reply["body"]))

discussion_data.append(Comment(body=comment_body, replies=replies))
async with asyncio.TaskGroup() as tg:
tg.create_task(paginate_comments(tg, None))

return Discussion(title=discussion_title, body=discussion_body, comments=discussion_data)

Expand All @@ -120,10 +159,13 @@ async def main():
parser.add_argument("--owner", help="GitHub Repository Owner")
parser.add_argument("--repo", help="GitHub Repository Name")
parser.add_argument("--discussion-number", type=int, help="GitHub Discussion Number")
parser.add_argument("--api", help="GitHub GraphQL endpoint", default="https://api.github.com/graphql")
args = parser.parse_args()

logging.basicConfig(level=logging.DEBUG)

async with aiohttp.ClientSession(trust_env=True) as session:
discussion_data = await fetch_discussion_data(session, args.token, args.owner, args.repo, args.discussion_number)
discussion_data = await fetch_discussion_data(session, args.api, args.token, args.owner, args.repo, args.discussion_number)
print(json.dumps(discussion_data, default=lambda x: x.__dict__, indent=2))

if __name__ == "__main__":
Expand Down

0 comments on commit 02c7d36

Please sign in to comment.