From ee824ec77bcb616eddb1559b590e793277d9f281 Mon Sep 17 00:00:00 2001 From: "jace.chun" Date: Tue, 27 Jun 2023 17:47:21 +0800 Subject: [PATCH 1/5] updated forms.py to get questions info from google form --- ggvlib/google/forms.py | 57 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/ggvlib/google/forms.py b/ggvlib/google/forms.py index 4854966..0381dfe 100644 --- a/ggvlib/google/forms.py +++ b/ggvlib/google/forms.py @@ -74,9 +74,9 @@ def get_responses_as_df(form_id: str) -> pd.DataFrame: if "value" in list(answer[0].keys()): return_list.append(answer[0]["value"]) - return_df = pd.concat( - [return_df, pd.DataFrame(return_list).T] - ).reset_index(drop=True) + return_df = pd.concat([return_df, pd.DataFrame(return_list).T]).reset_index( + drop=True + ) return_df = return_df.rename( columns={ 0: "response_id", @@ -86,3 +86,54 @@ def get_responses_as_df(form_id: str) -> pd.DataFrame: } ) return return_df + + +def get_questions(form_id: str) -> Dict[str, str]: + """Get questions for a given form_id + + Args: + form_id (str): A form_id + + Returns: + Dict[str, str]: A dictionary of the form's questions + """ + logger.info(f"Getting questions from form {form_id}") + return _client().forms().get(formId=form_id).execute() + + +def get_questions_as_df(form_id: str) -> pd.DataFrame: + """Get question for a given form_id as a Pandas DataFrame + + Args: + form_id (str): A form_id + + Returns: + pd.DataFrame: A DataFrame composed of the form's questions + """ + return_df = pd.DataFrame() + data = get_questions(form_id) + logger.info(f"Getting questions as df from form {form_id}") + # print(data["items"]) + return_df = pd.DataFrame() + for row in data["items"]: + if "questionItem" in row: + pair = list() + pair.append(row["title"]) + pair.append(row["questionItem"]["question"]["questionId"]) + pair.append("none") + return_df = pd.concat([return_df, pd.DataFrame(pair).T]).reset_index( + drop=True + ) + elif "questionGroupItem" in row: + for item in row["questionGroupItem"]["questions"]: + pair = list() + pair.append(row["title"]) + pair.append(item["questionId"]) + pair.append(item["rowQuestion"]["title"]) + return_df = pd.concat([return_df, pd.DataFrame(pair).T]).reset_index( + drop=True + ) + return_df = return_df.rename( + columns={0: "question_title", 1: "question_id", 2: "sub_question"} + ) + return return_df From 9c016a10ecd405acc5bf266db37364d23dbc642e Mon Sep 17 00:00:00 2001 From: "jace.chun" Date: Tue, 27 Jun 2023 18:21:21 +0800 Subject: [PATCH 2/5] updated forms.py to get the reponses question in the response df --- ggvlib/google/forms.py | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/ggvlib/google/forms.py b/ggvlib/google/forms.py index 0381dfe..81068a7 100644 --- a/ggvlib/google/forms.py +++ b/ggvlib/google/forms.py @@ -3,6 +3,7 @@ import google.auth from googleapiclient.discovery import build, Resource import pandas as pd +import np DEFAULT_SCOPES = [ "https://www.googleapis.com/auth/drive", @@ -38,7 +39,7 @@ def get_client() -> Resource: return _client() -def get_responses(form_id: str) -> Dict[str, str]: +def get_raw_responses(form_id: str) -> Dict[str, str]: """Get responses for a given form_id Args: @@ -51,7 +52,7 @@ def get_responses(form_id: str) -> Dict[str, str]: return _client().forms().responses().list(formId=form_id).execute() -def get_responses_as_df(form_id: str) -> pd.DataFrame: +def get_raw_responses_as_df(form_id: str) -> pd.DataFrame: """Get responses for a given form_id as a Pandas DataFrame Args: @@ -62,29 +63,24 @@ def get_responses_as_df(form_id: str) -> pd.DataFrame: """ return_df = pd.DataFrame() logger.info(f"Getting form responses as df from form {form_id}") - responses = get_responses(form_id) + responses = get_raw_responses(form_id) for row in responses["responses"]: return_list = list() return_list.append(row["responseId"]) return_list.append(row["createTime"]) return_list.append(row["lastSubmittedTime"]) return_list.append(row["answers"]) + col_names = ["responseId", "createTime", "LastSubmittedTime", "answer"] for val in row["answers"].values(): for answer in list(val.values())[1].values(): if "value" in list(answer[0].keys()): + col_names.append(val["questionId"]) return_list.append(answer[0]["value"]) return_df = pd.concat([return_df, pd.DataFrame(return_list).T]).reset_index( drop=True ) - return_df = return_df.rename( - columns={ - 0: "response_id", - 1: "created_time", - 2: "last_submitted_time", - 3: "answers", - } - ) + return_df.columns = col_names return return_df @@ -113,7 +109,6 @@ def get_questions_as_df(form_id: str) -> pd.DataFrame: return_df = pd.DataFrame() data = get_questions(form_id) logger.info(f"Getting questions as df from form {form_id}") - # print(data["items"]) return_df = pd.DataFrame() for row in data["items"]: if "questionItem" in row: @@ -137,3 +132,26 @@ def get_questions_as_df(form_id: str) -> pd.DataFrame: columns={0: "question_title", 1: "question_id", 2: "sub_question"} ) return return_df + + +def get_responses_as_df(form_id: str) -> pd.DataFrame: + """Get responses for a given form_id as a Pandas DataFrame + + Args: + form_id (str): A form_id + + Returns: + pd.DataFrame: A DataFrame composed of the form's responses mapped with questions as column + """ + question = get_questions_as_df(form_id) + response = get_raw_responses_as_df(form_id) + question["name"] = np.where( + question["sub_question"] == "none", + question["question_title"], + question["question_title"] + "-" + question["sub_question"], + ) + name_dict = dict() + for id, name in zip(question["question_id"], question["name"]): + name_dict[id] = name + return_df = response.rename(columns=name_dict) + return return_df From 48ffe40b8d9348cfda6b07ad5261ec2df4d28909 Mon Sep 17 00:00:00 2001 From: "jace.chun" Date: Tue, 27 Jun 2023 20:37:31 +0800 Subject: [PATCH 3/5] updated forms.py to get the reponses question in the response df --- ggvlib/google/forms.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggvlib/google/forms.py b/ggvlib/google/forms.py index 81068a7..145d958 100644 --- a/ggvlib/google/forms.py +++ b/ggvlib/google/forms.py @@ -113,7 +113,7 @@ def get_questions_as_df(form_id: str) -> pd.DataFrame: for row in data["items"]: if "questionItem" in row: pair = list() - pair.append(row["title"]) + pair.append(row["title"].strip()) pair.append(row["questionItem"]["question"]["questionId"]) pair.append("none") return_df = pd.concat([return_df, pd.DataFrame(pair).T]).reset_index( @@ -122,7 +122,7 @@ def get_questions_as_df(form_id: str) -> pd.DataFrame: elif "questionGroupItem" in row: for item in row["questionGroupItem"]["questions"]: pair = list() - pair.append(row["title"]) + pair.append(row["title"].strip()) pair.append(item["questionId"]) pair.append(item["rowQuestion"]["title"]) return_df = pd.concat([return_df, pd.DataFrame(pair).T]).reset_index( From bfc5db03811b890ef22494560d978b2f65f6c8bf Mon Sep 17 00:00:00 2001 From: "jace.chun" Date: Wed, 28 Jun 2023 16:05:56 +0800 Subject: [PATCH 4/5] updated forms.py to get the reponses question in the response df --- ggvlib/google/forms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggvlib/google/forms.py b/ggvlib/google/forms.py index 145d958..2e0bb7b 100644 --- a/ggvlib/google/forms.py +++ b/ggvlib/google/forms.py @@ -3,7 +3,7 @@ import google.auth from googleapiclient.discovery import build, Resource import pandas as pd -import np +import numpy as np DEFAULT_SCOPES = [ "https://www.googleapis.com/auth/drive", From 7445961df9607b229a33b3dcbb318dcd589c01be Mon Sep 17 00:00:00 2001 From: "jace.chun" Date: Mon, 3 Jul 2023 17:26:05 +0800 Subject: [PATCH 5/5] updated form.py when there is no responses --- ggvlib/google/forms.py | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/ggvlib/google/forms.py b/ggvlib/google/forms.py index 2e0bb7b..58c888b 100644 --- a/ggvlib/google/forms.py +++ b/ggvlib/google/forms.py @@ -64,23 +64,26 @@ def get_raw_responses_as_df(form_id: str) -> pd.DataFrame: return_df = pd.DataFrame() logger.info(f"Getting form responses as df from form {form_id}") responses = get_raw_responses(form_id) - for row in responses["responses"]: - return_list = list() - return_list.append(row["responseId"]) - return_list.append(row["createTime"]) - return_list.append(row["lastSubmittedTime"]) - return_list.append(row["answers"]) - col_names = ["responseId", "createTime", "LastSubmittedTime", "answer"] - for val in row["answers"].values(): - for answer in list(val.values())[1].values(): - if "value" in list(answer[0].keys()): - col_names.append(val["questionId"]) - return_list.append(answer[0]["value"]) - - return_df = pd.concat([return_df, pd.DataFrame(return_list).T]).reset_index( - drop=True - ) - return_df.columns = col_names + if "responses" in responses.keys(): + for row in responses["responses"]: + return_list = list() + return_list.append(row["responseId"]) + return_list.append(row["createTime"]) + return_list.append(row["lastSubmittedTime"]) + return_list.append(row["answers"]) + col_names = ["responseId", "createTime", "LastSubmittedTime", "answer"] + for val in row["answers"].values(): + for answer in list(val.values())[1].values(): + if "value" in list(answer[0].keys()): + col_names.append(val["questionId"]) + return_list.append(answer[0]["value"]) + + return_df = pd.concat([return_df, pd.DataFrame(return_list).T]).reset_index( + drop=True + ) + return_df.columns = col_names + else: + logger.info(f"There is no responses in form {form_id}") return return_df @@ -155,3 +158,6 @@ def get_responses_as_df(form_id: str) -> pd.DataFrame: name_dict[id] = name return_df = response.rename(columns=name_dict) return return_df + + +get_responses_as_df("1QvSmtVTVPfY7jkU21scy-q3YmWjBQiLBZgfuvNJocFc")