diff --git a/docs/commands/search.sql b/docs/commands/search.sql new file mode 100644 index 0000000000..f7465cfa80 --- /dev/null +++ b/docs/commands/search.sql @@ -0,0 +1,487 @@ +SELECT count() FROM gha WHERE + id LIKE '%in case you have any feedback 😊%' +OR type LIKE '%in case you have any feedback 😊%' +OR actor.login LIKE '%in case you have any feedback 😊%' +OR actor.display_login LIKE '%in case you have any feedback 😊%' +OR actor.gravatar_id LIKE '%in case you have any feedback 😊%' +OR actor.url LIKE '%in case you have any feedback 😊%' +OR actor.avatar_url LIKE '%in case you have any feedback 😊%' +OR repo.name LIKE '%in case you have any feedback 😊%' +OR repo.url LIKE '%in case you have any feedback 😊%' +OR payload.ref LIKE '%in case you have any feedback 😊%' +OR payload.ref_type LIKE '%in case you have any feedback 😊%' +OR payload.pusher_type LIKE '%in case you have any feedback 😊%' +OR payload.head LIKE '%in case you have any feedback 😊%' +OR payload.before LIKE '%in case you have any feedback 😊%' +OR payload.master_branch LIKE '%in case you have any feedback 😊%' +OR payload.description LIKE '%in case you have any feedback 😊%' +OR payload.action LIKE '%in case you have any feedback 😊%' +OR org.login LIKE '%in case you have any feedback 😊%' +OR org.gravatar_id LIKE '%in case you have any feedback 😊%' +OR org.url LIKE '%in case you have any feedback 😊%' +OR org.avatar_url LIKE '%in case you have any feedback 😊%' +OR payload.review.node_id LIKE '%in case you have any feedback 😊%' +OR payload.review.user.login LIKE '%in case you have any feedback 😊%' +OR payload.review.user.node_id LIKE '%in case you have any feedback 😊%' +OR payload.review.user.avatar_url LIKE '%in case you have any feedback 😊%' +OR payload.review.user.gravatar_id LIKE '%in case you have any feedback 😊%' +OR payload.review.user.url LIKE '%in case you have any feedback 😊%' +OR payload.review.user.html_url LIKE '%in case you have any feedback 😊%' +OR payload.review.user.followers_url LIKE '%in case you have any feedback 😊%' +OR payload.review.user.following_url LIKE '%in case you have any feedback 😊%' +OR payload.review.user.gists_url LIKE '%in case you have any feedback 😊%' +OR payload.review.user.starred_url LIKE '%in case you have any feedback 😊%' +OR payload.review.user.subscriptions_url LIKE '%in case you have any feedback 😊%' +OR payload.review.user.organizations_url LIKE '%in case you have any feedback 😊%' +OR payload.review.user.repos_url LIKE '%in case you have any feedback 😊%' +OR payload.review.user.events_url LIKE '%in case you have any feedback 😊%' +OR payload.review.user.received_events_url LIKE '%in case you have any feedback 😊%' +OR payload.review.user.type LIKE '%in case you have any feedback 😊%' +OR payload.review.body LIKE '%in case you have any feedback 😊%' +OR payload.review.commit_id LIKE '%in case you have any feedback 😊%' +OR payload.review.state LIKE '%in case you have any feedback 😊%' +OR payload.review.html_url LIKE '%in case you have any feedback 😊%' +OR payload.review.pull_request_url LIKE '%in case you have any feedback 😊%' +OR payload.review.author_association LIKE '%in case you have any feedback 😊%' +OR payload.review._links.html.href LIKE '%in case you have any feedback 😊%' +OR payload.review._links.pull_request.href LIKE '%in case you have any feedback 😊%' +OR payload.comment.url LIKE '%in case you have any feedback 😊%' +OR payload.comment.html_url LIKE '%in case you have any feedback 😊%' +OR payload.comment.node_id LIKE '%in case you have any feedback 😊%' +OR payload.comment.user.login LIKE '%in case you have any feedback 😊%' +OR payload.comment.user.node_id LIKE '%in case you have any feedback 😊%' +OR payload.comment.user.avatar_url LIKE '%in case you have any feedback 😊%' +OR payload.comment.user.gravatar_id LIKE '%in case you have any feedback 😊%' +OR payload.comment.user.url LIKE '%in case you have any feedback 😊%' +OR payload.comment.user.html_url LIKE '%in case you have any feedback 😊%' +OR payload.comment.user.followers_url LIKE '%in case you have any feedback 😊%' +OR payload.comment.user.following_url LIKE '%in case you have any feedback 😊%' +OR payload.comment.user.gists_url LIKE '%in case you have any feedback 😊%' +OR payload.comment.user.starred_url LIKE '%in case you have any feedback 😊%' +OR payload.comment.user.subscriptions_url LIKE '%in case you have any feedback 😊%' +OR payload.comment.user.organizations_url LIKE '%in case you have any feedback 😊%' +OR payload.comment.user.repos_url LIKE '%in case you have any feedback 😊%' +OR payload.comment.user.events_url LIKE '%in case you have any feedback 😊%' +OR payload.comment.user.received_events_url LIKE '%in case you have any feedback 😊%' +OR payload.comment.user.type LIKE '%in case you have any feedback 😊%' +OR payload.comment.path LIKE '%in case you have any feedback 😊%' +OR payload.comment.commit_id LIKE '%in case you have any feedback 😊%' +OR payload.comment.author_association LIKE '%in case you have any feedback 😊%' +OR payload.comment.body LIKE '%in case you have any feedback 😊%' +OR payload.comment.reactions.url LIKE '%in case you have any feedback 😊%' +OR payload.comment.issue_url LIKE '%in case you have any feedback 😊%' +OR payload.comment.diff_hunk LIKE '%in case you have any feedback 😊%' +OR payload.comment.original_commit_id LIKE '%in case you have any feedback 😊%' +OR payload.comment.pull_request_url LIKE '%in case you have any feedback 😊%' +OR payload.comment.start_side LIKE '%in case you have any feedback 😊%' +OR payload.comment.side LIKE '%in case you have any feedback 😊%' +OR payload.issue.url LIKE '%in case you have any feedback 😊%' +OR payload.issue.repository_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.labels_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.comments_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.events_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.html_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.node_id LIKE '%in case you have any feedback 😊%' +OR payload.issue.title LIKE '%in case you have any feedback 😊%' +OR payload.issue.user.login LIKE '%in case you have any feedback 😊%' +OR payload.issue.user.node_id LIKE '%in case you have any feedback 😊%' +OR payload.issue.user.avatar_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.user.gravatar_id LIKE '%in case you have any feedback 😊%' +OR payload.issue.user.url LIKE '%in case you have any feedback 😊%' +OR payload.issue.user.html_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.user.followers_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.user.following_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.user.gists_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.user.starred_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.user.subscriptions_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.user.organizations_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.user.repos_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.user.events_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.user.received_events_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.user.type LIKE '%in case you have any feedback 😊%' +OR payload.issue.state LIKE '%in case you have any feedback 😊%' +OR payload.issue.assignee.login LIKE '%in case you have any feedback 😊%' +OR payload.issue.assignee.node_id LIKE '%in case you have any feedback 😊%' +OR payload.issue.assignee.avatar_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.assignee.gravatar_id LIKE '%in case you have any feedback 😊%' +OR payload.issue.assignee.url LIKE '%in case you have any feedback 😊%' +OR payload.issue.assignee.html_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.assignee.followers_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.assignee.following_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.assignee.gists_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.assignee.starred_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.assignee.subscriptions_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.assignee.organizations_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.assignee.repos_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.assignee.events_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.assignee.received_events_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.assignee.type LIKE '%in case you have any feedback 😊%' +OR payload.issue.milestone.url LIKE '%in case you have any feedback 😊%' +OR payload.issue.milestone.html_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.milestone.labels_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.milestone.node_id LIKE '%in case you have any feedback 😊%' +OR payload.issue.milestone.title LIKE '%in case you have any feedback 😊%' +OR payload.issue.milestone.description LIKE '%in case you have any feedback 😊%' +OR payload.issue.milestone.creator.login LIKE '%in case you have any feedback 😊%' +OR payload.issue.milestone.creator.node_id LIKE '%in case you have any feedback 😊%' +OR payload.issue.milestone.creator.avatar_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.milestone.creator.gravatar_id LIKE '%in case you have any feedback 😊%' +OR payload.issue.milestone.creator.url LIKE '%in case you have any feedback 😊%' +OR payload.issue.milestone.creator.html_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.milestone.creator.followers_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.milestone.creator.following_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.milestone.creator.gists_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.milestone.creator.starred_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.milestone.creator.subscriptions_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.milestone.creator.organizations_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.milestone.creator.repos_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.milestone.creator.events_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.milestone.creator.received_events_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.milestone.creator.type LIKE '%in case you have any feedback 😊%' +OR payload.issue.milestone.state LIKE '%in case you have any feedback 😊%' +OR payload.issue.author_association LIKE '%in case you have any feedback 😊%' +OR payload.issue.active_lock_reason LIKE '%in case you have any feedback 😊%' +OR payload.issue.body LIKE '%in case you have any feedback 😊%' +OR payload.issue.reactions.url LIKE '%in case you have any feedback 😊%' +OR payload.issue.timeline_url LIKE '%in case you have any feedback 😊%' +OR payload.issue.state_reason LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.node_id LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.html_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.diff_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.patch_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.issue_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.state LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.title LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.user.login LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.user.node_id LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.user.avatar_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.user.gravatar_id LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.user.url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.user.html_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.user.followers_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.user.following_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.user.gists_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.user.starred_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.user.subscriptions_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.user.organizations_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.user.repos_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.user.events_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.user.received_events_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.user.type LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.body LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.merge_commit_sha LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.commits_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.review_comments_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.review_comment_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.comments_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.statuses_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.label LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.ref LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.sha LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.user.login LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.user.node_id LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.user.avatar_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.user.gravatar_id LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.user.url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.user.html_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.user.followers_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.user.following_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.user.gists_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.user.starred_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.user.subscriptions_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.user.organizations_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.user.repos_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.user.events_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.user.received_events_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.user.type LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.node_id LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.name LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.full_name LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.owner.login LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.owner.node_id LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.owner.avatar_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.owner.gravatar_id LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.owner.url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.owner.html_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.owner.followers_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.owner.following_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.owner.gists_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.owner.starred_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.owner.subscriptions_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.owner.organizations_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.owner.repos_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.owner.events_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.owner.received_events_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.owner.type LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.html_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.description LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.forks_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.keys_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.collaborators_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.teams_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.hooks_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.issue_events_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.events_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.assignees_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.branches_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.tags_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.blobs_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.git_tags_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.git_refs_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.trees_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.statuses_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.languages_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.stargazers_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.contributors_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.subscribers_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.subscription_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.commits_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.git_commits_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.comments_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.issue_comment_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.contents_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.compare_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.merges_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.archive_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.downloads_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.issues_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.pulls_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.milestones_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.notifications_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.labels_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.releases_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.deployments_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.git_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.ssh_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.clone_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.svn_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.homepage LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.language LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.mirror_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.visibility LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.head.repo.default_branch LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.label LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.ref LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.sha LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.user.login LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.user.node_id LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.user.avatar_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.user.gravatar_id LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.user.url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.user.html_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.user.followers_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.user.following_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.user.gists_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.user.starred_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.user.subscriptions_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.user.organizations_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.user.repos_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.user.events_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.user.received_events_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.user.type LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.node_id LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.name LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.full_name LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.owner.login LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.owner.node_id LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.owner.avatar_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.owner.gravatar_id LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.owner.url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.owner.html_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.owner.followers_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.owner.following_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.owner.gists_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.owner.starred_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.owner.subscriptions_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.owner.organizations_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.owner.repos_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.owner.events_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.owner.received_events_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.owner.type LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.html_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.description LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.forks_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.keys_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.collaborators_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.teams_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.hooks_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.issue_events_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.events_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.assignees_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.branches_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.tags_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.blobs_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.git_tags_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.git_refs_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.trees_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.statuses_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.languages_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.stargazers_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.contributors_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.subscribers_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.subscription_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.commits_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.git_commits_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.comments_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.issue_comment_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.contents_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.compare_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.merges_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.archive_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.downloads_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.issues_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.pulls_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.milestones_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.notifications_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.labels_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.releases_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.deployments_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.git_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.ssh_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.clone_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.svn_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.homepage LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.language LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.mirror_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.visibility LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.base.repo.default_branch LIKE '%in case you have any feedback 😊%' +OR payload.pull_request._links.self.href LIKE '%in case you have any feedback 😊%' +OR payload.pull_request._links.html.href LIKE '%in case you have any feedback 😊%' +OR payload.pull_request._links.issue.href LIKE '%in case you have any feedback 😊%' +OR payload.pull_request._links.comments.href LIKE '%in case you have any feedback 😊%' +OR payload.pull_request._links.review_comments.href LIKE '%in case you have any feedback 😊%' +OR payload.pull_request._links.review_comment.href LIKE '%in case you have any feedback 😊%' +OR payload.pull_request._links.commits.href LIKE '%in case you have any feedback 😊%' +OR payload.pull_request._links.statuses.href LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.author_association LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.active_lock_reason LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.mergeable_state LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.merged_by.login LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.merged_by.node_id LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.merged_by.avatar_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.merged_by.gravatar_id LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.merged_by.url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.merged_by.html_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.merged_by.followers_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.merged_by.following_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.merged_by.gists_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.merged_by.starred_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.merged_by.subscriptions_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.merged_by.organizations_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.merged_by.repos_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.merged_by.events_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.merged_by.received_events_url LIKE '%in case you have any feedback 😊%' +OR payload.pull_request.merged_by.type LIKE '%in case you have any feedback 😊%' +OR payload.forkee.node_id LIKE '%in case you have any feedback 😊%' +OR payload.forkee.name LIKE '%in case you have any feedback 😊%' +OR payload.forkee.full_name LIKE '%in case you have any feedback 😊%' +OR payload.forkee.owner.login LIKE '%in case you have any feedback 😊%' +OR payload.forkee.owner.node_id LIKE '%in case you have any feedback 😊%' +OR payload.forkee.owner.avatar_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.owner.gravatar_id LIKE '%in case you have any feedback 😊%' +OR payload.forkee.owner.url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.owner.html_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.owner.followers_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.owner.following_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.owner.gists_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.owner.starred_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.owner.subscriptions_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.owner.organizations_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.owner.repos_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.owner.events_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.owner.received_events_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.owner.type LIKE '%in case you have any feedback 😊%' +OR payload.forkee.html_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.description LIKE '%in case you have any feedback 😊%' +OR payload.forkee.url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.forks_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.keys_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.collaborators_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.teams_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.hooks_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.issue_events_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.events_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.assignees_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.branches_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.tags_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.blobs_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.git_tags_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.git_refs_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.trees_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.statuses_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.languages_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.stargazers_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.contributors_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.subscribers_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.subscription_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.commits_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.git_commits_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.comments_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.issue_comment_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.contents_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.compare_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.merges_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.archive_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.downloads_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.issues_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.pulls_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.milestones_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.notifications_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.labels_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.releases_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.deployments_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.git_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.ssh_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.clone_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.svn_url LIKE '%in case you have any feedback 😊%' +OR payload.forkee.homepage LIKE '%in case you have any feedback 😊%' +OR payload.forkee.visibility LIKE '%in case you have any feedback 😊%' +OR payload.forkee.default_branch LIKE '%in case you have any feedback 😊%' +OR payload.release.url LIKE '%in case you have any feedback 😊%' +OR payload.release.assets_url LIKE '%in case you have any feedback 😊%' +OR payload.release.upload_url LIKE '%in case you have any feedback 😊%' +OR payload.release.html_url LIKE '%in case you have any feedback 😊%' +OR payload.release.author.login LIKE '%in case you have any feedback 😊%' +OR payload.release.author.node_id LIKE '%in case you have any feedback 😊%' +OR payload.release.author.avatar_url LIKE '%in case you have any feedback 😊%' +OR payload.release.author.gravatar_id LIKE '%in case you have any feedback 😊%' +OR payload.release.author.url LIKE '%in case you have any feedback 😊%' +OR payload.release.author.html_url LIKE '%in case you have any feedback 😊%' +OR payload.release.author.followers_url LIKE '%in case you have any feedback 😊%' +OR payload.release.author.following_url LIKE '%in case you have any feedback 😊%' +OR payload.release.author.gists_url LIKE '%in case you have any feedback 😊%' +OR payload.release.author.starred_url LIKE '%in case you have any feedback 😊%' +OR payload.release.author.subscriptions_url LIKE '%in case you have any feedback 😊%' +OR payload.release.author.organizations_url LIKE '%in case you have any feedback 😊%' +OR payload.release.author.repos_url LIKE '%in case you have any feedback 😊%' +OR payload.release.author.events_url LIKE '%in case you have any feedback 😊%' +OR payload.release.author.received_events_url LIKE '%in case you have any feedback 😊%' +OR payload.release.author.type LIKE '%in case you have any feedback 😊%' +OR payload.release.node_id LIKE '%in case you have any feedback 😊%' +OR payload.release.tag_name LIKE '%in case you have any feedback 😊%' +OR payload.release.target_commitish LIKE '%in case you have any feedback 😊%' +OR payload.release.name LIKE '%in case you have any feedback 😊%' +OR payload.release.tarball_url LIKE '%in case you have any feedback 😊%' +OR payload.release.zipball_url LIKE '%in case you have any feedback 😊%' +OR payload.release.body LIKE '%in case you have any feedback 😊%' +OR payload.release.short_description_html LIKE '%in case you have any feedback 😊%' +OR payload.release.discussion_url LIKE '%in case you have any feedback 😊%' +OR payload.member.login LIKE '%in case you have any feedback 😊%' +OR payload.member.node_id LIKE '%in case you have any feedback 😊%' +OR payload.member.avatar_url LIKE '%in case you have any feedback 😊%' +OR payload.member.gravatar_id LIKE '%in case you have any feedback 😊%' +OR payload.member.url LIKE '%in case you have any feedback 😊%' +OR payload.member.html_url LIKE '%in case you have any feedback 😊%' +OR payload.member.followers_url LIKE '%in case you have any feedback 😊%' +OR payload.member.following_url LIKE '%in case you have any feedback 😊%' +OR payload.member.gists_url LIKE '%in case you have any feedback 😊%' +OR payload.member.starred_url LIKE '%in case you have any feedback 😊%' +OR payload.member.subscriptions_url LIKE '%in case you have any feedback 😊%' +OR payload.member.organizations_url LIKE '%in case you have any feedback 😊%' +OR payload.member.repos_url LIKE '%in case you have any feedback 😊%' +OR payload.member.events_url LIKE '%in case you have any feedback 😊%' +OR payload.member.received_events_url LIKE '%in case you have any feedback 😊%' +OR payload.member.type LIKE '%in case you have any feedback 😊%' diff --git a/docs/commands/super.md b/docs/commands/super.md index 1faec39c6a..41eaa2d839 100644 --- a/docs/commands/super.md +++ b/docs/commands/super.md @@ -7,13 +7,10 @@ sidebar_label: super > **TL;DR** `super` is a command-line tool that uses [SuperSQL](../language/README.md) > to query a variety of data formats in files, over HTTP, or in [S3](../integrations/amazon-s3.md) -> storage. It is particularly fast when operating on data in binary formats such as -> [Super Binary](../formats/bsup.md), [Super Columnar](../formats/csup.md), and -> [Parquet](https://github.com/apache/parquet-format). -> -> The `super` design philosophy blends the command-line, embedded database -> approach of SQLite and DuckDB with the query/search-tool approach -> of `jq`, `awk`, and `grep`. +> storage. Best performance is achieved when operating on data in binary formats such as +> [Super Binary](../formats/bsup.md), [Super Columnar](../formats/csup.md), +> [Parquet](https://github.com/apache/parquet-format), or +> [Arrow](https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format). ## Usage @@ -22,54 +19,64 @@ super [ options ] [ -c query ] input [ input ... ] ``` `super` is a command-line tool for processing data in diverse input -formats, powering data wrangling, search, analytics, and extensive transformations -using the [SuperSQL language](../language/README.md). A SuperSQL query may be extended with -[pipe syntax](https://research.google/pubs/sql-has-problems-we-can-fix-them-pipe-syntax-in-sql/) -to apply Boolean logic or keyword search to filter the input, transform, and/or analyze -the filtered stream. Output is written to one or more files or to -standard output. - -Each `input` argument must be a file path, an HTTP or HTTPS URL, -an S3 URL, or standard input specified with `-`. - -For built-in command help and a listing of all available options, -simply run `super` with no arguments. - -`super` supports a number of [input](#input-formats) and [output](#output-formats) formats, but [Super Binary](../formats/bsup.md) -tends to be the most space-efficient and most performant. Super Binary has efficiency similar to -[Avro](https://avro.apache.org) -and [Protocol Buffers](https://developers.google.com/protocol-buffers) -but its comprehensive [type system](../formats/zed.md) obviates -the need for schema specification or registries. -Also, the [Super JSON](../formats/jsup.md) format is human-readable and entirely one-to-one with Super Binary -so there is no need to represent non-readable formats like Avro or Protocol Buffers -in a clunky JSON encapsulated form. - -`super` typically operates on Super Binary-encoded data and when you want to inspect -human-readable bits of output, you merely format it as Super JSON, which is the -default format when output is directed to the terminal. Super Binary is the default -when redirecting to a non-terminal output like a file or pipe. +formats, providing data wrangling, search, analytics, and extensive transformations +using the [SuperSQL](../language/README.md) dialect of SQL. Any SQL query expression +may be extended with [pipe syntax](https://research.google/pubs/sql-has-problems-we-can-fix-them-pipe-syntax-in-sql/) +to filter, transform, and/or analyze input data. +Super's SQL pipes dialect is extensive, so much so that it can resemble +a log-search experience despite its SQL foundation. -When run with input arguments, each input's format is [automatically inferred](#auto-detection) -and each input is scanned -in the order appearing on the command line forming the input stream. +The `super` command works with data from ephemeral sources like files and URLs. +If you want to persist your data into a data lake for persistent storage, +check out the [`super db`](super-db.md) set of commands. By invoking the `-c` option, a query expressed in the [SuperSQL language](../language/README.md) may be specified and applied to the input stream. -If no query is specified, the inputs are scanned without modification -and output in the desired format as [described below](#input-formats). This latter approach -provides a convenient means to convert files from one format to another. +Super's data model is based on super-structured data, meaning that all data +is both strongly _and_ dynamically typed and need not conform to a homogeneous +schema. The type structure is self-describing so it's easy to daisy-chain +queries and inspect data at any point in a complex query or data pipeline. +For example, there's no need for a set of Parquet input files to all be +schema-compatible and it's easy to mix and match Parquet with JSON across +queries. -When `super` is run with a query and no input arguments, then the query must -begin with -* a [`from`, `file`, or `get` operator](../language/operators/from.md), or -* an explicit or implied [`yield` operator](../language/operators/yield.md). +When processing JSON data, all values are converted to strongly typed values +that fit naturally alongside relational data so there is no need for a separate +"JSON type". Unlike SQL systems that integrate JSON data, +there isn't a JSON way to do things and a separate relational way +to do things. -In the case of a `yield` with no inputs, the query is run with -a single input value of `null`. This provides a convenient means to run in a -"calculator mode" where input is produced by the `yield` and can be operated upon -by the query, e.g., +Because there are no schemas, there is no schema inference, so inferred schemas +do not haphazardly change when input data changes in subtle ways. + +Each `input` argument to `super` must be a file path, an HTTP or HTTPS URL, +an S3 URL, or standard input specified with `-`. +These input arguments are treated as if a SQL "from" operator precedes +the provided query, e.g., +``` +super -c "from example.json | select typeof(this)" +``` +is equivalent to +``` +super -c "select typeof(this)" example.json +``` +Output is written to one or more files or to standard output in the format specified. + +When multiple input files are specified, they are processed in the order given as +if the data were provided by a single, concatenated "from" clause. + +If no query is specified with `-c`, the inputs are scanned without modification +and output in the desired format as [described below](#input-formats), +providing a convenient means to convert files from one format to another, e.g., +``` +super -f arrows file1.json file2.parquet file3.csv > file-combined.arrows +``` +When `super` is run with a query that has no "from" operator and no input arguments, +the SuperSQL query is fed a single `null` value analogous to SQL's default +input of a single empty row of an unnamed table. +This provides a convenient means to explore examples or run in a +"calculator mode", e.g., ```mdtest-command super -z -c '1+1' ``` @@ -77,10 +84,40 @@ emits ```mdtest-output 2 ``` -Note here that the query `1+1` [implies](../language/pipeline-model.md#implied-operators) -`yield 1+1`. +Note that SuperSQL's has syntactic shortcuts for interactive data exploration and +an expression that stands alone is a shortcut for `select value`, e.g., the query text +``` +1+1 +``` +is equivalent to +``` +select value 1+1 +``` +To learn more about shortcuts, refer to the SuperSQL +[documentation on shortcuts](../language/pipeline-model.md#implied-operators). + +For built-in command help and a listing of all available options, +simply run `super` with no arguments. + +## Data Formats + +`super` supports a number of [input](#input-formats) and [output](#output-formats) formats, but the super formats +([Super Binary](../formats/bsup.md), +[Super Columnar](../formats/csup.md), +and [Super JSON](../formats/jsup.md)) tend to be the most versatile and +easy to work with. + +`super` typically operates on binary-encoded data and when you want to inspect +human-readable bits of output, you merely format it as Super JSON, which is the +default format when output is directed to the terminal. Super Binary is the default +when redirecting to a non-terminal output like a file or pipe. + +Unless the `-i` option specifies a specific input format, +each input's format is [automatically inferred](#auto-detection) +and each input is scanned +in the order appearing on the command line forming the input stream. -## Input Formats +### Input Formats `super` currently supports the following input formats: @@ -138,8 +175,8 @@ would produce this output in the default Super JSON format ### JSON Auto-detection: Super vs. Plain -Since [Super JSON](../formats/jsup.md) is a superset of plain JSON, `super` must be careful in whether it -interprets input as either format. While you can always clarify your intent +Since [Super JSON](../formats/jsup.md) is a superset of plain JSON, `super` must be careful how it distinguishes the two cases when performing auto-inference. +While you can always clarify your intent with the `-i jsup` or `-i json`, `super` attempts to "just do the right thing" when you run it with Super JSON vs. plain JSON. @@ -164,7 +201,7 @@ as an outer object or as a value nested somewhere within a JSON array. This heuristic almost always works in practice because Super JSON records typically omit quotes around field names. -## Output Formats +### Output Formats `super` currently supports the following output formats: @@ -270,8 +307,8 @@ or register schemas or "protos" with the downstream entities. In particular, Super Binary data can simply be concatenated together, e.g., ```mdtest-command -super -f bsup -c 'yield 1,[1,2,3]' > a.bsup -super -f bsup -c 'yield {s:"hello"},{s:"world"}' > b.bsup +super -f bsup -c 'select value 1, [1,2,3]' > a.bsup +super -f bsup -c 'select value {s:"hello"}, {s:"world"}' > b.bsup cat a.bsup b.bsup | super -z - ``` produces @@ -283,7 +320,7 @@ produces ``` And while this Super JSON output is human readable, the Super Binary files are binary, e.g., ```mdtest-command -super -f bsup -c 'yield 1,[1,2,3]' > a.bsup +super -f bsup -c 'select value 1,[ 1,2,3]' > a.bsup hexdump -C a.bsup ``` produces @@ -545,7 +582,7 @@ have many examples, but here are a few more simple `super` use cases. _Hello, world_ ```mdtest-command -echo '"hello, world"' | super -z -c 'yield this' - +super -z -c "select value 'hello, world'" ``` produces this Super JSON output ```mdtest-output @@ -554,7 +591,7 @@ produces this Super JSON output _Some values of available [data types](../language/data-types.md)_ ```mdtest-command -echo '1 1.5 [1,"foo"] |["apple","banana"]|' | super -z -c 'yield this' - +echo '1 1.5 [1,"foo"] |["apple","banana"]|' | super -z - ``` produces ```mdtest-output @@ -565,7 +602,7 @@ produces ``` _The types of various data_ ```mdtest-command -echo '1 1.5 [1,"foo"] |["apple","banana"]|' | super -z -c 'yield typeof(this)' - +echo '1 1.5 [1,"foo"] |["apple","banana"]|' | super -z -c 'select value typeof(this)' - ``` produces ```mdtest-output @@ -616,200 +653,728 @@ produces ## Performance -Your mileage may vary, but many new users of `super` are surprised by its speed -compared to tools like `jq`, `grep`, `awk`, or `sqlite` especially when running -`super` over files in the Super Binary format. - -### Fast Pattern Matching - -One important technique that helps `super` run fast is to take advantage of queries -that involve fine-grained searches. +You might think that the overhead involved in managing super-structured types +and the generality of heterogeneous data would confound the performance of +the `super` command, but it turns out that `super` can hold its own when +compared to other analytics systems. -When a query begins with a logical expression containing either a search -or a predicate match with a constant value, and presuming the input data format -is Super Binary, then the runtime optimizes the query by performing an efficient, -byte-oriented "pre-search" of the values required in the predicate. This pre-search -scans the bytes that comprise a large buffer of values and looks for these values -and, if they are not present, the entire buffer is discarded knowing no individual -value in that buffer could match because the required serialized -values were not present in the buffer. +To illustrate comparative performance, we'll present some informal performance +measurements among `super`, +[`DuckDB`](https://duckdb.org/), +[`ClickHouse`](https://clickhouse.com/), and +[`DataFusion`](https://datafusion.apache.org/). -For example, if the query is -``` -"http error" and ipsrc==10.0.0.1 | count() -``` -then the pre-search would look for the string "http error" and the encoding -of the IP address 10.0.0.1 and unless both those values are present, then the -buffer is discarded. +We'll use the Parquet format to compare apples to apples +and also report results for the custom columnar database format of DuckDB +and the Super Binary format used by `super`. +We tried loading our test data into a ClickHouse table using its +[new experimental JSON type](https://clickhouse.com/blog/a-new-powerful-json-data-type-for-clickhouse) +but those attempts failed with "too many open files". -Moreover, Super Binary data is compressed and arranged into frames that can be decompressed -and processed in parallel. This allows the decompression and pre-search to -run in parallel very efficiently across a large number of threads. When searching -for sparse results, many frames are discarded without their uncompressed bytes -having to be processed any further. +As of this writing in November 2024, we're using the latest version 1.1.3 of `duckdb`. +version 24.11.1.1393 of `clickhouse`, and v43.0.0 of `datafusion-cli`. -### Efficient JSON Processing +### The Test Data -While processing data in the Super Binary format is far more efficient than JSON, -there is substantial JSON data in the world and it is important for JSON -input to perform well. +These tests are based on the data and exemplary queries +published by the DuckDB team on their blog +[Shredding Deeply Nested JSON, One Vector at a Time](https://duckdb.org/2023/03/03/json.html). We'll follow their script starting at the +[GitHub Archive Examples](https://duckdb.org/2023/03/03/json.html#github-archive-examples). -This proved a challenge as `super` is written in [Go](https://go.dev/) and Go's JSON package -is not particularly performant. To this end, `super` has its own lean and simple -[JSON tokenizer](https://pkg.go.dev/github.com/brimdata/super/pkg/jsonlexer), -which performs quite well, -and is -[integrated tightly](https://github.com/brimdata/super/blob/main/zio/jsonio/reader.go) -with SuperDB's internal data representation. -Moreover, like `jq`, -`super`'s JSON parser does not require objects to be newline delimited and can -incrementally parse the input to minimize memory overhead and improve -processor cache performance. - -The net effect is a JSON parser that is typically a bit faster than the -native C implementation in `jq`. - -### Performance Comparisons - -To provide a rough sense of the performance tradeoffs between `super` and -other tooling, this section provides results of a few simple speed tests. - -#### Test Data +If you want to reproduce these results for yourself, +you can fetch the 2.2GB of gzipped JSON data: +``` +wget https://data.gharchive.org/2023-02-08-0.json.gz +wget https://data.gharchive.org/2023-02-08-1.json.gz +... +wget https://data.gharchive.org/2023-02-08-23.json.gz +``` +We downloaded these files into a directory called `gharchive_gz` +and created a duckdb database file called `gha.db` and a table called `gha` +using this command: +``` +duckdb gha.db -c "CREATE TABLE gha AS FROM read_json('gharchive_gz/*.json.gz', union_by_name=true)" +``` +To create a relational table from the input JSON, we utilized DuckDB's +`union_by_name` parameter to fuse all of the different shapes of JSON encountered +into a single monolithic schema. -These tests are easy to reproduce. The input data comes from a -[repository of sample security log data](https://github.com/brimdata/zed-sample-data), -where we used a semi-structured Zeek "conn" log from the `zeek-default` directory. +We then created a Parquet file called `gha.parquet` with this command: +``` +duckdb gha.db -c "COPY (from gha) TO 'gha.parquet'" +``` +To create a super-structed file for the `super` command, there is no need to +fuse the data into a single schema (though `super` can still work with the fused +schema in the Parquet file), and we simply ran this command to create a Super Binary +file: +``` +super gharchive_gz/*.json.gz > gha.bsup +``` +This code path in `super` is not multi-threaded so not particularly performant but, +on our test machine, it takes about the same time as the `duckdb` method of creating +a schema-fused table. -It is easy to convert the Zeek logs to a local Super Binary file using -`super`'s built-in [`get` operator](../language/operators/get.md): +Here are the resulting file sizes: ``` -super -o conn.bsup -c 'get https://raw.githubusercontent.com/brimdata/zed-sample-data/main/zeek-default/conn.log.gz' +% du -h gha.db gha.parquet gha.bsup gharchive_gz +9.3G gha.db +4.6G gha.parquet +2.8G gha.bsup +2.2G gharchive_gz ``` -This creates a new file `conn.bsup` from the Zeek log file fetched from GitHub. -Note that this data is a gzip'd file in the Zeek format and `super`'s auto-detector -figures out both that it is gzip'd and that the uncompressed format is Zeek. -There's no need to specify flags for this. +### The Test Queries + +The test queries involve these patterns: +* simple search (single and multicolumn) +* count-where aggregation +* count by field aggregation +* rank over union of disparate field types -Next, a JSON file can be converted from Super Binary using: +We will call these tests `search`, `search+`, `count`, `agg`, and `union`, respectively + +#### Search + +For the search test, we'll search for the string pattern +``` + "in case you have any feedback 😊" +``` +in the field `payload.pull_request.body` +and we'll just count the number of matches found. +The number of matches is small (3) so the query performance is dominated +by the search. + +The SQL for this query is +```sql +SELECT count() +FROM 'gha.parquet' -- or gha +WHERE payload.pull_request.body LIKE '%in case you have any feedback 😊%' +``` +SuperSQL has a function called `grep` that is similar to the SQL `LIKE` clause but +can operate over specified fields or default to all the string fields in any value. +The SuperSQL query is +```sql +SELECT count() +FROM 'gha.bsup' +WHERE grep('in case you have any feedback 😊', payload.pull_request.body) ``` -super -f json conn.bsup > conn.json + +#### Search+ + +For search across multiple columns, SQL doesn't have a `grep` function so +we must enumerate all the fields of such a query. The SQL for a string search +over our GitHub Archive dataset involves the following fields: +```sql +SELECT count() FROM gha +WHERE id LIKE '%in case you have any feedback 😊%' + OR type LIKE '%in case you have any feedback 😊%' + OR actor.login LIKE '%in case you have any feedback 😊%' + OR actor.display_login LIKE '%in case you have any feedback 😊%' + ... + OR payload.member.type LIKE '%in case you have any feedback 😊%' ``` -Note here that we lose information in this conversion because the rich data types -of the [super data model](../formats/zed.md) (that were [translated from the Zeek format](../integrations/zeek/data-type-compatibility.md)) are lost. +There are 486 such fields. You can review the entire query in +[docs/commands/search.sql](search.sql). + +#### Count -We'll also make a SQLite database in the file `conn.db` as the table named `conn`. -One easy way to do this is to install -[sqlite-utils](https://sqlite-utils.datasette.io/en/stable/) -and run +In the `count` test, we filter the input with a WHERE clause and count the results. +We chose a random GitHub user name for the filter. +This query has the form: ``` -sqlite-utils insert conn.db conn conn.json --nl +SELECT count() +FROM 'gha.parquet' -- or gha or 'gha.bsup' +WHERE actor.login='johnbieren'" ``` -(If you need a cup of coffee, a good time to get it would be when -loading the JSON into SQLite.) -#### File Sizes +#### Agg -Note the resulting file sizes: +In the `agg` test, we filter the input and count the results grouped by the field `type` +as in the DuckDB blog. +This query has the form: ``` -% du -h conn.json conn.db conn.bsup -416M conn.json -192M conn.db - 38M conn.bsup +SELECT count(),type +FROM 'gha.parquet' -- or 'gha' or 'gha.bsup' +WHERE repo.name='duckdb/duckdb' +GROUP BY type ``` -Much of the performance of Super Binary derives from an efficient, parallelizable -structure where frames of data are compressed -(currently with [LZ4](http://lz4.github.io/lz4/) though the -specification supports multiple algorithms) and the sequence of values -can be processed with only partial deserialization. -That said, there are quite a few more opportunities to further improve -the performance of `super` and the SuperDB system and we have a number of projects -forthcoming on this front. +#### Union + +The `union` test is straight out of the DuckDB blog at the end of +[this section](https://duckdb.org/2023/03/03/json.html#handling-inconsistent-json-schemas). +This query computes the GitHub users that were assigned as a PR reviewer the most often +and returns the top 5 such users. +Because the assignees can appear in either a list of strings +or within a single string field, the relational model requires that two different +subqueries run for the two cases and the result unioned together; then, +this intermediary table can be counted using the unnested +assignee as the group-by key. +This query is: +```sql +WITH assignees AS ( + SELECT payload.pull_request.assignee.login assignee + FROM 'gha.parquet' + UNION ALL + SELECT unnest(payload.pull_request.assignees).login assignee + FROM 'gha.parquet' +) +SELECT assignee, count(*) count +FROM assignees +WHERE assignee IS NOT NULL +GROUP BY assignee +ORDER BY count DESC +LIMIT 5 +``` +For DataFusion, we needed to rewrite this SELECT +```sql +SELECT unnest(payload.pull_request.assignees).login +FROM 'gha.parquet' +``` +as +```sql +SELECT rec.login as assignee FROM ( + SELECT unnest(payload.pull_request.assignees) rec + FROM 'gha.parquet' +) +``` +and for ClickHouse, we had to use `arrayJoin` instead of `unnest`. + +SuperSQL's data model does not require these sorts of gymnastics as +everything does not have to be jammed into a table. Instead, we can use the +`UNNEST` pipe operator combined with the spread operator applied to the array of +string fields to easily produce a stream of string values representing the +assignees. Then we simply aggregate the assignee stream: +``` +FROM 'gha.bsup' +| UNNEST [...payload.pull_request.assignees, payload.pull_request.assignee] +| WHERE this IS NOT NULL +| AGGREGATE count() BY assignee:=login +| ORDER BY count DESC +| LIMIT 5 +``` + +### The Test Results + +The following table summarizes the results of each test as a column and +each tool as a row with the speed-up factor shown in parentheses: + +| tool | format | search | search+ | count | agg | union | +|--------------|---------------|---------------|---------------|----|------|-------| +| `super` | `bsup` | 3.2 (2.6X) | 6.7 (3.6X) | 3.2 (0.04X) | 3.1 (0.04X) | 3.8 (117X) | +| `super` | `parquet` | note 1 | note 1 | 0.18 (0.7X) | 0.27 (0.4X) | note 2 | +| `duckdb` | `db` | 8.2 | 24 | 0.13 | 0.12 | 446 | +| `duckdb` | `parquet` | 8.4 (1) | 23 (1X) | 0.26 (0.5X) | 0.21 (0.6X) | 419 (1.1X) | +| `datafusion` | `parquet` | 9.1 (0.9X) | 18 (1.3X) | 0.24 (0.5X) | 0.24 (0.5X) | 40 (11x) | +| `clickhouse` | `parquet` | 56 (0.1X) | 463 (0.1X) | 1 (0.1X) | 0.91 (0.1X) | 66 (7X) | + +_Note 1: the `super` vectorized runtime does not yet support `grep`_ + +_Note 2: the `super` vectorized runtime does not yet support array expressions_ -#### Tests +Since DuckDB with its native format is overall the best performing, +we used it as the baseline for all of the speedup factors. -We ran three styles of tests on a Mac quad-core 2.3GHz i7: -* `count` - compute the number of values present -* `search` - find a value in a field -* `agg` - sum a field grouped by another field +To summarize, +`super` with Super Binary is substantially faster than the relational systems for +the search use cases and performs on par with the others for traditional OLAP queries, +except for the union query, where the super-structured data model trounces the relational +model (by over 100X!) for stitching together disparate data types for analysis in an aggregation. -Each test was run for `jq`, `super` on JSON, `sqlite3`, and `super` on Super Binary. +## Appendix 1: Preparing the Test Data -We used the Bash `time` command to measure elapsed time. +For our tests, we diverged a bit from the methodology in the DuckDB blog and wanted +to put all the JSON data in a single table. It wasn't obvious how to go about this +and this section documents the difficulties we encountered trying to do so. -The command lines for the `count` test were: +First, we simply tried this: ``` -jq -s length conn.json -sqlite3 conn.db 'select count(*) from conn' -super -c 'count()' conn.bsup -super -c 'count()' conn.json +duckdb gha.db -c "CREATE TABLE gha AS FROM 'gharchive_gz/*.json.gz'" ``` -The command lines for the `search` test were: +which fails with ``` -jq 'select(.id.orig_h=="10.47.23.5")' conn.json -sqlite3 conn.db 'select * from conn where json_extract(id, "$.orig_h")=="10.47.23.5"' -super -c 'id.orig_h==10.47.23.5' conn.bsup -super -c 'id.orig_h==10.47.23.5' conn.json +Invalid Input Error: JSON transform error in file "gharchive_gz/2023-02-08-10.json.gz", in line 4903: Object {"url":"https://api.github.com/repos/aws/aws-sam-c... has unknown key "reactions" +Try increasing 'sample_size', reducing 'maximum_depth', specifying 'columns', 'format' or 'records' manually, setting 'ignore_errors' to true, or setting 'union_by_name' to true when reading multiple files with a different structure. ``` -Here, we look for an IP address (10.47.23.5) in a specific -field `id.orig_h` in the semi-structured data. Note when using Super Binary, -the IP is a native type whereas for `jq` and SQLite it is a string. -Note that `sqlite` must use its `json_extract` function since nested JSON objects -are stored as minified JSON text. +Clearly the schema inference algorithm relies upon sampling and the sample doesn't +cover enough data to capture all of its variations. -The command lines for the `agg` test were: +Okay, maybe there is a reason the blog first explores the structure of +the data to specify `columns` arguments to `read_json` as suggested by the error +message above. To this end, you can run this query: ``` -jq -n -f agg.jq conn.json -sqlite3 conn.db 'select sum(orig_bytes),json_extract(id, "$.orig_h") as orig_h from conn group by orig_h' -super -c "sum(orig_bytes) by id.orig_h" conn.bsup -super -c "sum(orig_bytes) by id.orig_h" conn.json +SELECT json_group_structure(json) +FROM ( + SELECT * + FROM read_ndjson_objects('gharchive_gz/*.json.gz') + LIMIT 2048 +); ``` -where the `agg.jq` script is: +Unfortunately, if you use the resulting structure to create the `columns` argument +then `duckdb` fails also because the first 2048 records don't have enough coverage. +So let's try removing the `LIMIT` clause: ``` -def adder(stream): - reduce stream as $s ({}; .[$s.key] += $s.val); -adder(inputs | {key:.id.orig_h,val:.orig_bytes}) -| to_entries[] -| {orig_h: (.key), sum: .value} +SELECT json_group_structure(json) +FROM ( + SELECT * + FROM read_ndjson_objects('gharchive_gz/*.json.gz') +); ``` +Hmm, now `duckdb` runs out of memory. -#### Results +We then thought we'd see if the sampling algorithm of `read_json` is more efficient, +so we ran tried this command with successively larger sample sizes: +``` +duckdb scratch -c "CREATE TABLE gha AS FROM read_json('gharchive_gz/*.json.gz', sample_size=1000000)" +``` +even with a million rows as the sample, `duckdb` fails with +``` +Invalid Input Error: JSON transform error in file "gharchive_gz/2023-02-08-14.json.gz", in line 49745: Object {"issues":"write","metadata":"read","pull_requests... has unknown key "repository_hooks" +Try increasing 'sample_size', reducing 'maximum_depth', specifying 'columns', 'format' or 'records' manually, setting 'ignore_errors' to true, or setting 'union_by_name' to true when reading multiple files with a different structure. +``` +Ok, there 4434953 JSON objects in the input so let's try this: +``` +duckdb gha.db -c "CREATE TABLE gha AS FROM read_json('gharchive_gz/*.json.gz', sample_size=4434953)" +``` +and again `duckdb` runs out of memory. -The following table summarizes the results of each test as a column and -each tool as a row with the speed-up factor (relative to `jq`) -shown in parentheses: - -| | `count` | `search` | `agg` | -|------|---------------|---------------|---------------| -| `jq` | 11,540ms (1X) | 10,730ms (1X) | 20,175ms (1X) | -| `super-json` | 7,150ms (1.6X) | 7,230ms (1.5X) | 7,390ms (2.7X) | -| `sqlite` | 100ms (115X) | 620ms (17X) | 1,475ms (14X) | -| `super-bsup` | 110ms (105X) | 135ms (80X) | 475ms (42X) | - -To summarize, `super` with Super Binary is consistently fastest though `sqlite` -was a bit faster counting rows. - -In particular, `super` is substantially faster (40-100X) than `jq` with the efficient -Super Binary format but more modestly faster (50-170%) when processing the bulky JSON input. -This is expected because parsing JSON becomes the bottleneck. - -While SQLite is much faster than `jq`, it is not as fast as `super`. The primary -reason for this is that SQLite stores its semi-structured columns as minified JSON text, -so it must scan and parse the JSON when executing the _where_ clause above -as well as the aggregated fields. - -Also, note that the inferior performance of `sqlite` is in areas where databases -perform extraordinarily well if you do the work to -(1) transform semi-structured columns to relational columns by flattening -nested JSON objects (which are not indexable by `sqlite`) and -(2) configuring database indexes. - -In fact, if you implement these changes, `sqlite` performs better than `super` on these tests. - -However, the benefit of SuperDB is that no flattening is required. And unlike `sqlite`, -`super` is not intended to be a database. That said, there is no reason why database -performance techniques cannot be applied to the super data model and this is precisely what the -open-source SuperDB project intends to do. - -Stay tuned! +So we looked at the other options suggested by the error message and +`union_by_name` appeared promising. Enabling this option causes DuckDB +to combine all the JSON objects into a single fused schema. +Maybe this would work better? + +Sure enough, this works: +``` +duckdb gha.db -c "CREATE TABLE gha AS FROM read_json('gharchive_gz/*.json.gz', union_by_name=true)" +``` +We now have the `duckdb` database file for our GitHub Archive data called `gha.db` +containing a single table called `gha` embedded in that database. +What about the super-structured +format for the `super` command? There is no need to futz with sample sizes, +schema inference, or union by name, just run this to create a Super Binary file: +``` +super gharchive_gz/*.json.gz > gha.bsup +``` + +## Appendix 2: Running the Tests + +This appendix provides the raw tests and output that we run on a MacBook Pro to generate +the table of results above. + +### Search Test + +``` +; time super -c " + SELECT count() + FROM 'gha.bsup' + WHERE grep('in case you have any feedback 😊', payload.pull_request.body) +" +{count:2(uint64)} +super -c 12.70s user 0.69s system 415% cpu 3.223 total + +time duckdb gha.db -c " + SELECT count() + FROM gha + WHERE payload.pull_request.body LIKE '%in case you have any feedback 😊%' +" +┌──────────────┐ +│ count_star() │ +│ int64 │ +├──────────────┤ +│ 2 │ +└──────────────┘ +duckdb gha.db -c 26.66s user 6.90s system 406% cpu 8.266 total + +; time duckdb -c " + SELECT count() + FROM gha.parquet + WHERE payload.pull_request.body LIKE '%in case you have any feedback 😊%' +" +┌──────────────┐ +│ count_star() │ +│ int64 │ +├──────────────┤ +│ 2 │ +└──────────────┘ +duckdb -c 42.71s user 6.06s system 582% cpu 8.380 total + +; time datafusion-cli -c " + SELECT count() + FROM 'gha.parquet' + WHERE payload.pull_request.body LIKE '%in case you have any feedback 😊%' +" +DataFusion CLI v43.0.0 ++---------+ +| count() | ++---------+ +| 2 | ++---------+ +1 row(s) fetched. +Elapsed 8.819 seconds. + +datafusion-cli -c 40.75s user 6.72s system 521% cpu 9.106 total + +; time clickhouse -q " + SELECT count() + FROM 'gha.parquet' + WHERE payload.pull_request.body LIKE '%in case you have any feedback 😊%' +" +2 +clickhouse -q 50.81s user 1.83s system 94% cpu 55.994 total +``` + +### Search+ Test + +``` +; time super -c " + SELECT count() + FROM 'gha.bsup' + WHERE grep('in case you have any feedback 😊') +" +{count:3(uint64)} +super -c 43.80s user 0.71s system 669% cpu 6.653 total + +; time duckdb gha.db < search.sql +┌──────────────┐ +│ count_star() │ +│ int64 │ +├──────────────┤ +│ 3 │ +└──────────────┘ +duckdb gha.db < search.sql 73.60s user 33.29s system 435% cpu 24.563 total + +; time duckdb < search-parquet.sql +┌──────────────┐ +│ count_star() │ +│ int64 │ +├──────────────┤ +│ 3 │ +└──────────────┘ +duckdb < search-parquet.sql 89.57s user 29.21s system 513% cpu 23.113 total + +; time datafusion-cli -f search-parquet.sql +DataFusion CLI v43.0.0 ++---------+ +| count() | ++---------+ +| 3 | ++---------+ +1 row(s) fetched. +Elapsed 18.184 seconds. +datafusion-cli -f search-parquet.sql 83.84s user 11.13s system 513% cpu 18.494 total + +; time clickhouse --queries-file search-parquet.sql +3 +clickhouse --queries-file search-parquet.sql 515.68s user 5.50s system 112% cpu 7:43.37 total +``` +### Count Test + +``` +; time super -c " + SELECT count() + FROM 'gha.bsup' + WHERE actor.login='johnbieren' +" +{count:879(uint64)} +super -c 13.81s user 0.71s system 449% cpu 3.233 total + +; time SUPER_VAM=1 super -c " + SELECT count() + FROM 'gha.parquet' + WHERE actor.login='johnbieren' +" +{count:879(uint64)} +SUPER_VAM=1 super -c 0.43s user 0.08s system 277% cpu 0.182 total + +; time duckdb gha.db -c " + SELECT count() + FROM gha + WHERE actor.login='johnbieren' +" +┌──────────────┐ +│ count_star() │ +│ int64 │ +├──────────────┤ +│ 879 │ +└──────────────┘ +duckdb gha.db -c 0.64s user 0.06s system 517% cpu 0.134 total + +; time duckdb -c " + SELECT count() + FROM 'gha.parquet' + WHERE actor.login='johnbieren' +" +┌──────────────┐ +│ count_star() │ +│ int64 │ +├──────────────┤ +│ 879 │ +└──────────────┘ +duckdb gha.db -c 1.14s user 0.14s system 490% cpu 0.261 total + +DataFusion CLI v43.0.0 ++---------+ +| count() | ++---------+ +| 879 | ++---------+ +1 row(s) fetched. +Elapsed 0.203 seconds. + +datafusion-cli -c 0.93s user 0.15s system 453% cpu 0.238 total + +; time clickhouse -q " + SELECT count() + FROM 'gha.parquet' + WHERE actor.login='johnbieren' +" +879 +clickhouse -q 0.86s user 0.07s system 93% cpu 1.001 total +``` + +### Agg Test + +``` +; time super -c " + SELECT count(),type + FROM 'gha.bsup' + WHERE repo.name='duckdb/duckdb' + GROUP BY type +" +{type:"PullRequestReviewEvent",count:14(uint64)} +{type:"IssueCommentEvent",count:30(uint64)} +{type:"WatchEvent",count:29(uint64)} +{type:"PullRequestEvent",count:35(uint64)} +{type:"PushEvent",count:15(uint64)} +{type:"IssuesEvent",count:9(uint64)} +{type:"ForkEvent",count:3(uint64)} +{type:"PullRequestReviewCommentEvent",count:7(uint64)} +super -c 12.24s user 0.68s system 413% cpu 3.129 total + +; time SUPER_VAM=1 super -c " + SELECT count(),type + FROM 'gha.parquet' + WHERE repo.name='duckdb/duckdb' + GROUP BY type +" +{type:"IssueCommentEvent",count:30(uint64)} +{type:"PullRequestEvent",count:35(uint64)} +{type:"PushEvent",count:15(uint64)} +{type:"WatchEvent",count:29(uint64)} +{type:"PullRequestReviewEvent",count:14(uint64)} +{type:"ForkEvent",count:3(uint64)} +{type:"PullRequestReviewCommentEvent",count:7(uint64)} +{type:"IssuesEvent",count:9(uint64)} +SUPER_VAM=1 super -c 1.01s user 0.13s system 421% cpu 0.271 total + +; time duckdb gha.db -c " + SELECT count(),type + FROM gha + WHERE repo.name='duckdb/duckdb' + GROUP BY type +" +┌──────────────┬───────────────────────────────┐ +│ count_star() │ type │ +│ int64 │ varchar │ +├──────────────┼───────────────────────────────┤ +│ 3 │ ForkEvent │ +│ 35 │ PullRequestEvent │ +│ 29 │ WatchEvent │ +│ 7 │ PullRequestReviewCommentEvent │ +│ 15 │ PushEvent │ +│ 9 │ IssuesEvent │ +│ 14 │ PullRequestReviewEvent │ +│ 30 │ IssueCommentEvent │ +└──────────────┴───────────────────────────────┘ +duckdb gha.db -c 0.49s user 0.06s system 466% cpu 0.119 total + +; time duckdb -c " + SELECT count(),type + FROM 'gha.parquet' + WHERE repo.name='duckdb/duckdb' + GROUP BY type +" +┌──────────────┬───────────────────────────────┐ +│ count_star() │ type │ +│ int64 │ varchar │ +├──────────────┼───────────────────────────────┤ +│ 9 │ IssuesEvent │ +│ 7 │ PullRequestReviewCommentEvent │ +│ 15 │ PushEvent │ +│ 14 │ PullRequestReviewEvent │ +│ 3 │ ForkEvent │ +│ 29 │ WatchEvent │ +│ 35 │ PullRequestEvent │ +│ 30 │ IssueCommentEvent │ +└──────────────┴───────────────────────────────┘ +duckdb -c 0.73s user 0.14s system 413% cpu 0.211 total + +; time datafusion-cli -c " + SELECT count(),type + FROM 'gha.parquet' + WHERE repo.name='duckdb/duckdb' + GROUP BY type +" +DataFusion CLI v43.0.0 ++---------+-------------------------------+ +| count() | type | ++---------+-------------------------------+ +| 15 | PushEvent | +| 35 | PullRequestEvent | +| 7 | PullRequestReviewCommentEvent | +| 14 | PullRequestReviewEvent | +| 30 | IssueCommentEvent | +| 9 | IssuesEvent | +| 29 | WatchEvent | +| 3 | ForkEvent | ++---------+-------------------------------+ +8 row(s) fetched. +Elapsed 0.200 seconds. + +datafusion-cli -c 0.80s user 0.15s system 398% cpu 0.238 total + +; time clickhouse -q " + SELECT count(),type + FROM 'gha.parquet' + WHERE repo.name='duckdb/duckdb' + GROUP BY type +" +30 IssueCommentEvent +14 PullRequestReviewEvent +15 PushEvent +29 WatchEvent +9 IssuesEvent +7 PullRequestReviewCommentEvent +3 ForkEvent +35 PullRequestEvent +clickhouse -q 0.77s user 0.11s system 97% cpu 0.908 total +``` + +### Union Test + +``` +time super -c " + FROM 'gha.bsup' + | SELECT VALUE payload.pull_request + | WHERE this IS NOT NULL + | UNNEST [...assignees, assignee] + | WHERE this IS NOT NULL + | AGGREGATE count() BY assignee:=login + | ORDER BY count DESC + | LIMIT 5 +" +{assignee:"poad",count:1966(uint64)} +{assignee:"vinayakkulkarni",count:508(uint64)} +{assignee:"tmtmtmtm",count:356(uint64)} +{assignee:"AMatutat",count:260(uint64)} +{assignee:"danwinship",count:208(uint64)} +super -c 12.39s user 0.95s system 351% cpu 3.797 total + +; time duckdb gha.db -c " + WITH assignees AS ( + SELECT payload.pull_request.assignee.login assignee + FROM gha + UNION ALL + SELECT unnest(payload.pull_request.assignees).login assignee + FROM gha + ) + SELECT assignee, count(*) count + FROM assignees + WHERE assignee NOT NULL + GROUP BY assignee + ORDER BY count DESC + LIMIT 5 +" +┌─────────────────┬───────┐ +│ assignee │ count │ +│ varchar │ int64 │ +├─────────────────┼───────┤ +│ poad │ 1966 │ +│ vinayakkulkarni │ 508 │ +│ tmtmtmtm │ 356 │ +│ AMatutat │ 260 │ +│ danwinship │ 208 │ +└─────────────────┴───────┘ +duckdb gha.db -c 3119.93s user 90.86s system 719% cpu 7:26.22 total + +time duckdb -c " + WITH assignees AS ( + SELECT payload.pull_request.assignee.login assignee + FROM 'gha.parquet' + UNION ALL + SELECT unnest(payload.pull_request.assignees).login assignee + FROM 'gha.parquet' + ) + SELECT assignee, count(*) count + FROM assignees + WHERE assignee NOT NULL + GROUP BY assignee + ORDER BY count DESC + LIMIT 5 +" +┌─────────────────┬───────┐ +│ assignee │ count │ +│ varchar │ int64 │ +├─────────────────┼───────┤ +│ poad │ 1966 │ +│ vinayakkulkarni │ 508 │ +│ tmtmtmtm │ 356 │ +│ AMatutat │ 260 │ +│ danwinship │ 208 │ +└─────────────────┴───────┘ +duckdb -c 2914.72s user 107.15s system 721% cpu 6:58.68 total + +time datafusion-cli -c " + WITH assignees AS ( + SELECT payload.pull_request.assignee.login assignee + FROM 'gha.parquet' + UNION ALL + SELECT object.login as assignee FROM ( + SELECT unnest(payload.pull_request.assignees) object + FROM 'gha.parquet' + ) + ) + SELECT assignee, count() count + FROM assignees + WHERE assignee IS NOT NULL + GROUP BY assignee + ORDER BY count DESC + LIMIT 5 +" +DataFusion CLI v43.0.0 ++-----------------+-------+ +| assignee | count | ++-----------------+-------+ +| poad | 1966 | +| vinayakkulkarni | 508 | +| tmtmtmtm | 356 | +| AMatutat | 260 | +| danwinship | 208 | ++-----------------+-------+ +5 row(s) fetched. +Elapsed 39.012 seconds. + +datafusion-cli -c 116.97s user 44.50s system 408% cpu 39.533 total + +; time clickhouse -q " + WITH assignees AS ( + SELECT payload.pull_request.assignee.login assignee + FROM 'gha.parquet' + UNION ALL + SELECT arrayJoin(payload.pull_request.assignees).login assignee + FROM 'gha.parquet' + ) + SELECT assignee, count(*) count + FROM assignees + WHERE assignee IS NOT NULL + GROUP BY assignee + ORDER BY count DESC + LIMIT 5 +" +poad 1966 +vinayakkulkarni 508 +tmtmtmtm 356 +AMatutat 260 +danwinship 208 +clickhouse -q 105.49s user 6.54s system 169% cpu 1:06.27 total +```