diff --git a/.gitignore b/.gitignore index e7e215d..ac08a9f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ .idea -*/config.ini \ No newline at end of file +config.ini \ No newline at end of file diff --git a/observations/config.sample b/config.sample similarity index 53% rename from observations/config.sample rename to config.sample index 87a8609..ec5ce5a 100644 --- a/observations/config.sample +++ b/config.sample @@ -1,50 +1,26 @@ [COMMON] diksha_survey_app_name = + diksha_integrated_app_name = -observation_status_output_dir = [API_HEADERS] -# -------------------------------- -# API variables -# -------------------------------- - -headers_getkeyclock = {'Content-Type': 'application/x-www-form-urlencoded'} -client_id = -grant_type = refresh_token - -# -------------------- -# Header API variables -# -------------------- - content_type = application/json -authorization = -refresh_token = -# ----------------------- -# Channel Id -# ----------------------- - -channel-id = +authorization = +internal_access_token = [URL] base_url = http://: -url_getkeyclock = https:///auth/realms/sunbird/protocol/openid-connect/token url_entity_related = assessment/api/v1/entities/relatedEntities/ -url_entity_relatedapi = http://:/assessment/api/v1/entities/relatedEntities/ - -sunbird_api_base_url_ip = http://:/api -sunbird_api_url_readuser = user/v1/read -sunbird_api_url_syncuser = data/v1/index/sync url_user_profile_api = assessment/api/v1/userExtension/getProfile/ -sunbird_api_url_searchuser = user/v1/search - +evidence_base_url = [MONGO] @@ -60,63 +36,37 @@ mongo_url = mongodb://: database_name = -# ------------------------------------- -# Mongo observationSubmission Collection -# ------------------------------------- +# ------------------- +# Mongo Collections +# ------------------- observation_sub_collec = -# ------------------------- -# Mongo solutions Collection -# -------------------------- - solutions_collec = -# ---------------------------- -# Mongo observations Collection -# ----------------------------- - observations_collec = -# --------------------------- -# Mongo entityTypes Collection -# ---------------------------- - entity_type_collec = -# ------------------------- -# Mongo questions Collection -# ------------------------- - questions_collec = -# ------------------------ -# Mongo criteria Collection -# ----------------------- - criteria_collec = -# ----------------------- -# Mongo entities Collection -# ----------------------- - entities_collec = -# ----------------------- -# Mongo programs Collection -# ----------------------- - programs_collec = -# ----------------------- -# Mongo user_roles Collection -# ----------------------- - user_roles_collection = -[DRUID] +criteria_questions_collection = + +projects_collection = -urlQuery = http://:/druid/indexer/v1/supervisor +survey_submissions_collection = + +survey_collection = + +[DRUID] druid_end_point = http://:/druid/coordinator/v1/datasources/ @@ -124,19 +74,23 @@ druid_batch_end_point = http://:/druid/indexer/v1/task observation_status_spec = -observation_spec = - -observation_evidence_spec = +general_unnati_spec = [KAFKA] kafka_url = : -kafka_raw_data_topic = +kafka_raw_data_topic = + +kafka_druid_topic = + +kafka_evidence_druid_topic = -kafka_druid_topic = +kafka_evidence_survey_druid_topic = -kafka_evidence_druid_topic = +kafka_raw_survey_topic = + +kafka_survey_druid_topic = [LOGS] @@ -152,6 +106,18 @@ observation_status_success_log_filename = /status/success.log observation_status_error_log_filename = /status/error.log +project_success_log_filename = + +project_error_log_filename = + +survey_evidence_streaming_success_log_filename = + +survey_evidence_streaming_error_log_filename = + +survey_streaming_success_log_filename = + +survey_streaming_error_log_filename = + [ELASTICSEARCH] header = {'Content-Type': 'application/json'} @@ -172,4 +138,20 @@ sas_token = container_name = -blob_path = +blob_path = + +projects_blob_path = + +[REDIS] + +host = + +port = + +db_name = + +[OUTPUT_DIR] + +projects_folder = + +observation_status_output_dir = \ No newline at end of file diff --git a/observations/py_observation_evidence_streaming.py b/observations/py_observation_evidence_streaming.py index 4f1328f..e9cc906 100755 --- a/observations/py_observation_evidence_streaming.py +++ b/observations/py_observation_evidence_streaming.py @@ -6,27 +6,19 @@ # ----------------------------------------------------------------- from pymongo import MongoClient from bson.objectid import ObjectId -import csv,os -import json -import boto3 +import os, json import datetime -from datetime import date,time -import requests -import argparse -from kafka import KafkaConsumer -from kafka import KafkaProducer -import dateutil -from dateutil import parser as date_parser -from configparser import ConfigParser,ExtendedInterpolation +# from datetime import date, time +from kafka import KafkaConsumer, KafkaProducer +from configparser import ConfigParser, ExtendedInterpolation import faust import logging import logging.handlers -import time from logging.handlers import TimedRotatingFileHandler -config_path = os.path.dirname(os.path.abspath(__file__)) +config_path = os.path.split(os.path.dirname(os.path.abspath(__file__))) config = ConfigParser(interpolation=ExtendedInterpolation()) -config.read(config_path + "/config.ini") +config.read(config_path[0] + "/config.ini") formatter = logging.Formatter('%(asctime)s - %(levelname)s') @@ -34,141 +26,162 @@ successLogger.setLevel(logging.DEBUG) # Add the log message handler to the logger -successHandler = logging.handlers.\ - RotatingFileHandler(config.get('LOGS','observation_streaming_evidence_success_log_filename')) -successBackuphandler = TimedRotatingFileHandler(config.get('LOGS','observation_streaming_evidence_success_log_filename'), - when="w0",backupCount=1) +successHandler = logging.handlers.RotatingFileHandler( + config.get('LOGS','observation_streaming_evidence_success_log_filename') +) +successBackuphandler = TimedRotatingFileHandler( + config.get('LOGS','observation_streaming_evidence_success_log_filename'), + when="w0", + backupCount=1 +) successHandler.setFormatter(formatter) successLogger.addHandler(successHandler) successLogger.addHandler(successBackuphandler) errorLogger = logging.getLogger('error log') errorLogger.setLevel(logging.ERROR) -errorHandler = logging.handlers.\ - RotatingFileHandler(config.get('LOGS','observation_streaming_evidence_error_log_filename')) -errorBackuphandler = TimedRotatingFileHandler(config.get('LOGS','observation_streaming_evidence_error_log_filename'), - when="w0",backupCount=1) +errorHandler = logging.handlers.RotatingFileHandler( + config.get('LOGS','observation_streaming_evidence_error_log_filename') +) +errorBackuphandler = TimedRotatingFileHandler( + config.get('LOGS','observation_streaming_evidence_error_log_filename'), + when="w0", + backupCount=1 +) errorHandler.setFormatter(formatter) errorLogger.addHandler(errorHandler) errorLogger.addHandler(errorBackuphandler) try: - kafka_url = (config.get("KAFKA","kafka_url")) - app = faust.App('sl_observation_evidences_diksha_faust',broker='kafka://'+kafka_url,value_serializer='raw', - web_port=7002,broker_max_poll_records=500) - rawTopicName = app.topic(config.get("KAFKA","kafka_raw_data_topic")) - producer = KafkaProducer(bootstrap_servers=[kafka_url]) - - #db production - clientdev = MongoClient(config.get('MONGO','mongo_url')) - dbdev = clientdev[config.get('MONGO','database_name')] - - observationSubmissionsDevCollec = dbdev[config.get('MONGO','observation_sub_collec')] - solutionsDevCollec = dbdev[config.get('MONGO','solutions_collec')] - observationDevCollec = dbdev[config.get('MONGO','observations_collec')] - entityTypeDevCollec = dbdev[config.get('MONGO','entity_type_collec')] - questionsDevCollec = dbdev[config.get('MONGO','questions_collec')] - criteriaDevCollec = dbdev[config.get('MONGO','criteria_collec')] - entitiesDevCollec = dbdev[config.get('MONGO','entities_collec')] + kafka_url = config.get("KAFKA", "kafka_url") + app = faust.App( + 'sl_observation_evidences_diksha_faust', + broker='kafka://'+kafka_url, + value_serializer='raw', + web_port=7002, + broker_max_poll_records=500 + ) + rawTopicName = app.topic(config.get("KAFKA", "kafka_raw_data_topic")) + producer = KafkaProducer(bootstrap_servers=[kafka_url]) + #db production + clientdev = MongoClient(config.get('MONGO','mongo_url')) + db = clientdev[config.get('MONGO','database_name')] + obsSubCollec = db[config.get('MONGO','observation_sub_collec')] + quesCollec = db[config.get('MONGO','questions_collec')] except Exception as e: - errorLogger.error(e,exc_info=True) + errorLogger.error(e, exc_info=True) try : - def convert(lst): - return ','.join(lst) + def convert(lst): + return ','.join(lst) except Exception as e: - errorLogger.error(e,exc_info=True) + errorLogger.error(e, exc_info=True) try: - def evidence_extraction(msg_id): - for obSub in observationSubmissionsDevCollec.find({'_id':ObjectId(msg_id)}): - successLogger.debug("Observation Evidence Submission Id : " + str(msg_id)) - try: - completedDate = str(datetime.datetime.date(obSub['completedDate'])) + 'T' \ - + str(datetime.datetime.time(obSub['completedDate'])) + 'Z' - except KeyError: - pass - evidence_sub_count = 0 - try: - answersArr = [ v for v in obSub['answers'].values()] - except KeyError: - pass - for ans in answersArr: + def evidence_extraction(msg_id): + for obSub in obsSubCollec.find({'_id':ObjectId(msg_id)}): + successLogger.debug("Observation Evidence Submission Id : " + str(msg_id)) + try: + completedDate = str( + datetime.datetime.date(obSub['completedDate']) + ) + 'T' + str( + datetime.datetime.time(obSub['completedDate']) + ) + 'Z' + except KeyError: + completedDate = '' + evidence_sub_count = 0 + + try: + answersArr = [ v for v in obSub['answers'].values()] + except KeyError: + pass + + for ans in answersArr: try: - if len(ans['fileName']): - evidence_sub_count = evidence_sub_count + len(ans['fileName']) + if len(ans['fileName']): + evidence_sub_count = evidence_sub_count + len(ans['fileName']) except KeyError: - if len(ans['instanceFileName']): - for instance in ans['instanceFileName']: - evidence_sub_count = evidence_sub_count + len(instance) - for answer in answersArr: - observationSubQuestionsObj = {} - observationSubQuestionsObj['completedDate'] = completedDate - observationSubQuestionsObj['total_evidences'] = evidence_sub_count - observationSubQuestionsObj['userName'] = obSub['evidencesStatus'][0]['submissions'][0]['submittedByName'] - observationSubQuestionsObj['userName'] = observationSubQuestionsObj['userName'].replace("null","") - observationSubQuestionsObj['observationSubmissionId'] = str(obSub['_id']) - observationSubQuestionsObj['school'] = str(obSub['entityId']) - observationSubQuestionsObj['schoolExternalId'] = obSub['entityExternalId'] - observationSubQuestionsObj['schoolName'] = obSub['entityInformation']['name'] - observationSubQuestionsObj['entityTypeId'] = str(obSub['entityTypeId']) - observationSubQuestionsObj['createdBy'] = obSub['createdBy'] - observationSubQuestionsObj['solutionExternalId'] = obSub['solutionExternalId'] - observationSubQuestionsObj['solutionId'] = str(obSub['solutionId']) - observationSubQuestionsObj['observationId'] = str(obSub['observationId']) - try : - observationSubQuestionsObj['appName'] = obSub["appInformation"]["appName"].lower() - except KeyError : - observationSubQuestionsObj['appName'] = config.get("COMMON","diksha_survey_app_name") - fileName = [] - fileSourcePath = [] - try: - observationSubQuestionsObj['remarks'] = answer['remarks'] - observationSubQuestionsObj['questionName'] = answer['payload']['question'][0] - except KeyError: - pass - observationSubQuestionsObj['questionId'] = str(answer['qid']) - for ques in questionsDevCollec.find({'_id':ObjectId(observationSubQuestionsObj['questionId'])}): - observationSubQuestionsObj['questionExternalId'] = ques['externalId'] - observationSubQuestionsObj['questionResponseType'] = answer['responseType'] - evidence = [] - evidenceCount = 0 - try: - if answer['fileName']: - evidence = answer['fileName'] - observationSubQuestionsObj['evidence_count'] = len(evidence) - evidenceCount = len(evidence) - except KeyError: - if answer['instanceFileName']: - for inst in answer['instanceFileName'] : + if len(ans['instanceFileName']): + for instance in ans['instanceFileName']: + evidence_sub_count = evidence_sub_count + len(instance) + + if completedDate : + for answer in answersArr: + if answer['qid']: + observationSubQuestionsObj = {} + observationSubQuestionsObj['completedDate'] = completedDate + observationSubQuestionsObj['total_evidences'] = evidence_sub_count + observationSubQuestionsObj['observationSubmissionId'] = str(obSub['_id']) + observationSubQuestionsObj['entity'] = str(obSub['entityId']) + observationSubQuestionsObj['entityExternalId'] = obSub['entityExternalId'] + observationSubQuestionsObj['entityName'] = obSub['entityInformation']['name'] + observationSubQuestionsObj['entityTypeId'] = str(obSub['entityTypeId']) + observationSubQuestionsObj['entityType'] = str(obSub['entityType']) + observationSubQuestionsObj['createdBy'] = obSub['createdBy'] + observationSubQuestionsObj['solutionExternalId'] = obSub['solutionExternalId'] + observationSubQuestionsObj['solutionId'] = str(obSub['solutionId']) + observationSubQuestionsObj['observationId'] = str(obSub['observationId']) + + try : + observationSubQuestionsObj['appName'] = obSub["appInformation"]["appName"].lower() + except KeyError : + observationSubQuestionsObj['appName'] = config.get("COMMON", "diksha_survey_app_name") + + fileName = [] + fileSourcePath = [] + + try: + observationSubQuestionsObj['remarks'] = answer['remarks'] + except KeyError: + observationSubQuestionsObj['remarks'] = '' + observationSubQuestionsObj['questionId'] = str(answer['qid']) + + questionsCollec = quesCollec.find({'_id':ObjectId(observationSubQuestionsObj['questionId'])}) + for ques in questionsCollec: + observationSubQuestionsObj['questionExternalId'] = ques['externalId'] + observationSubQuestionsObj['questionName'] = ques["question"][0] + observationSubQuestionsObj['questionResponseType'] = answer['responseType'] + evidence = [] + evidenceCount = 0 + + try: + if answer['fileName']: + evidence = answer['fileName'] + observationSubQuestionsObj['evidence_count'] = len(evidence) + evidenceCount = len(evidence) + except KeyError: + if answer['instanceFileName']: + for inst in answer['instanceFileName']: evidence.extend(inst) - observationSubQuestionsObj['evidence_count'] = len(evidence) - evidenceCount = len(evidence) - for evi in evidence: - fileName.append(evi['name']) - fileSourcePath.append(evi['sourcePath']) - observationSubQuestionsObj['fileName'] = convert(fileName) - observationSubQuestionsObj['fileSourcePath'] = convert(fileSourcePath) - if evidenceCount > 0: - producer.send((config.get("KAFKA","kafka_evidence_druid_topic")), json.dumps(observationSubQuestionsObj) - .encode('utf-8')) - producer.flush() - successLogger.debug("Send Obj to Kafka") -except Exception as e: - errorLogger.error(e,exc_info=True) + observationSubQuestionsObj['evidence_count'] = len(evidence) + evidenceCount = len(evidence) + for evi in evidence: + fileName.append(evi['name']) + fileSourcePath.append(evi['sourcePath']) + observationSubQuestionsObj['fileName'] = convert(fileName) + observationSubQuestionsObj['fileSourcePath'] = convert(fileSourcePath) + if evidenceCount > 0: + producer.send( + (config.get("KAFKA", "kafka_evidence_druid_topic")), + json.dumps(observationSubQuestionsObj).encode('utf-8') + ) + producer.flush() + successLogger.debug("Send Obj to Kafka") +except Exception as e: + errorLogger.error(e, exc_info=True) try: - @app.agent(rawTopicName) - async def observationEvidenceFaust(consumer) : - async for msg in consumer : + @app.agent(rawTopicName) + async def observationEvidenceFaust(consumer): + async for msg in consumer : msg_val = msg.decode('utf-8') msg_data = json.loads(msg_val) successLogger.debug("========== START OF OBSERVATION EVIDENCE SUBMISSION ========") obj_arr = evidence_extraction(msg_data['_id']) successLogger.debug("********* END OF OBSERVATION EVIDENCE SUBMISSION ***********") except Exception as e: - errorLogger.error(e,exc_info=True) + errorLogger.error(e, exc_info=True) if __name__ == '__main__': - app.main() + app.main() diff --git a/observations/py_observation_streaming.py b/observations/py_observation_streaming.py index 1abb054..a9768dd 100755 --- a/observations/py_observation_streaming.py +++ b/observations/py_observation_streaming.py @@ -1,647 +1,951 @@ # ----------------------------------------------------------------- # Name : sl_py_observation_streaming.py # Author : Ashwini.E , Shakthieshwari.A -# Description : -# This is streaming program -# Reads the data from Kafka topic process the observation submitted data +# Description : Program to read data from one kafka topic and +# produce it to another kafka topic # ----------------------------------------------------------------- -# Program to read data from one kafka topic and produce it to another kafka topic import faust from pymongo import MongoClient from bson.objectid import ObjectId -import csv,os -import json -import boto3 +import os, json import datetime -from datetime import date,time +# from datetime import date,time import requests -import argparse -from kafka import KafkaConsumer -from kafka import KafkaProducer +from kafka import KafkaConsumer, KafkaProducer from configparser import ConfigParser,ExtendedInterpolation import logging import logging.handlers import time from logging.handlers import TimedRotatingFileHandler +import redis -config_path = os.path.dirname(os.path.abspath(__file__)) +config_path = os.path.split(os.path.dirname(os.path.abspath(__file__))) config = ConfigParser(interpolation=ExtendedInterpolation()) -config.read(config_path + "/config.ini") +config.read(config_path[0] + "/config.ini") formatter = logging.Formatter('%(asctime)s - %(levelname)s') - successLogger = logging.getLogger('success log') successLogger.setLevel(logging.DEBUG) # Add the log message handler to the logger -successHandler = logging.handlers.\ - RotatingFileHandler(config.get('LOGS','observation_streaming_success_log_filename')) -successBackuphandler = TimedRotatingFileHandler(config.get('LOGS','observation_streaming_success_log_filename'), - when="w0",backupCount=1) +successHandler = logging.handlers.RotatingFileHandler( + config.get('LOGS', 'observation_streaming_success_log_filename') +) +successBackuphandler = TimedRotatingFileHandler( + config.get('LOGS', 'observation_streaming_success_log_filename'), + when="w0", + backupCount=1 +) successHandler.setFormatter(formatter) successLogger.addHandler(successHandler) successLogger.addHandler(successBackuphandler) errorLogger = logging.getLogger('error log') errorLogger.setLevel(logging.ERROR) -errorHandler = logging.handlers.\ - RotatingFileHandler(config.get('LOGS','observation_streaming_error_log_filename')) -errorBackuphandler = TimedRotatingFileHandler(config.get('LOGS','observation_streaming_error_log_filename'), - when="w0",backupCount=1) +errorHandler = logging.handlers.RotatingFileHandler( + config.get('LOGS', 'observation_streaming_error_log_filename') +) +errorBackuphandler = TimedRotatingFileHandler( + config.get('LOGS', 'observation_streaming_error_log_filename'), + when="w0", + backupCount=1 +) errorHandler.setFormatter(formatter) errorLogger.addHandler(errorHandler) errorLogger.addHandler(errorBackuphandler) -try: - kafka_url = (config.get("KAFKA","kafka_url")) - #consume the message from kafka topic - app = faust.App('sl_observation_diksha_faust',broker='kafka://'+kafka_url,value_serializer='raw', - web_port=7001,broker_max_poll_records=500) - rawTopicName = app.topic(config.get("KAFKA","kafka_raw_data_topic")) - producer = KafkaProducer(bootstrap_servers=[kafka_url]) - - #db production - clientdev = MongoClient(config.get('MONGO','mongo_url')) - dbdev = clientdev[config.get('MONGO','database_name')] - - observationSubmissionsDevCollec = dbdev[config.get('MONGO','observation_sub_collec')] - solutionsDevCollec = dbdev[config.get('MONGO','solutions_collec')] - observationDevCollec = dbdev[config.get('MONGO','observations_collec')] - entityTypeDevCollec = dbdev[config.get('MONGO','entity_type_collec')] - questionsDevCollec = dbdev[config.get('MONGO','questions_collec')] - criteriaDevCollec = dbdev[config.get('MONGO','criteria_collec')] - entitiesDevCollec = dbdev[config.get('MONGO','entities_collec')] - programsDevCollec = dbdev[config.get('MONGO','programs_collec')] -except Exception as e: - errorLogger.error(e,exc_info=True) +domArr = [] + +kafka_url = config.get("KAFKA", "kafka_url") +#consume the message from kafka topic +app = faust.App( + 'sl_observation_diksha_faust', + broker='kafka://'+kafka_url, + value_serializer='raw', + web_port=7001, + broker_max_poll_records=500 +) +rawTopicName = app.topic(config.get("KAFKA", "kafka_raw_data_topic")) +producer = KafkaProducer(bootstrap_servers=[kafka_url]) +#db production +client = MongoClient(config.get('MONGO', 'mongo_url')) +db = client[config.get('MONGO', 'database_name')] +obsSubCollec = db[config.get('MONGO', 'observation_sub_collec')] +solCollec = db[config.get('MONGO', 'solutions_collec')] +obsCollec = db[config.get('MONGO', 'observations_collec')] +questionsCollec = db[config.get('MONGO', 'questions_collec')] +entitiesCollec = db[config.get('MONGO', 'entities_collec')] +criteriaQuestionsCollec = db[config.get('MONGO', 'criteria_questions_collection')] +criteriaCollec = db[config.get('MONGO', 'criteria_collec')] +programsCollec = db[config.get('MONGO', 'programs_collec')] +# redis cache connection +redis_connection = redis.ConnectionPool( + host=config.get("REDIS", "host"), + decode_responses=True, + port=config.get("REDIS", "port"), + db=config.get("REDIS", "db_name") +) +datastore = redis.StrictRedis(connection_pool=redis_connection) try: - def removeduplicate(it): - seen = [] - for x in it: - if x not in seen: - yield x - seen.append(x) + def removeduplicate(it): + seen = [] + for x in it: + if x not in seen: + yield x + seen.append(x) except Exception as e: - errorLogger.error(e,exc_info=True) + errorLogger.error(e, exc_info=True) try: - #getKeyclock api to generate authentication token - def get_keyclock_accesstoken(): - url_getkeyclock = config.get("URL","url_getkeyclock") - headers_getkeyclock = {'Content-Type': 'application/x-www-form-urlencoded'} - body_getkeyclock = {"grant_type":config.get("API_HEADERS","grant_type"), - "client_id":config.get("API_HEADERS","client_id"), - "refresh_token":config.get("API_HEADERS","refresh_token")} - - responsegetkeyclock = requests.post(url_getkeyclock, data=body_getkeyclock,headers=headers_getkeyclock) - if responsegetkeyclock.status_code == 200: - successLogger.debug("getkeyclock api") - return responsegetkeyclock.json() + def getRelatedEntity(entityId): + urlEntityRelated = config.get( + "URL", "base_url" + ) + "/" + config.get( + "URL", "url_entity_related" + ) + str(entityId) + headersEntityRelated = { + 'Content-Type': config.get("API_HEADERS", "content_type"), + 'Authorization': "Bearer "+ config.get("API_HEADERS", "authorization"), + 'internal-access-token': config.get("API_HEADERS", "internal_access_token") + } + responseEntityRelated = requests.get(urlEntityRelated, headers=headersEntityRelated) + if responseEntityRelated.status_code == 200: + successLogger.debug("entityRelated api") + return responseEntityRelated.json() else: - errorLogger.error("Failure in getkeyclock API") - errorLogger.error(responsegetkeyclock) - errorLogger.error(responsegetkeyclock.text) + errorLogger.error(" Failure in EntityRelatedApi ") + errorLogger.error(responseEntityRelated) + errorLogger.error(responseEntityRelated.text) except Exception as e: - errorLogger.error(e,exc_info=True) + errorLogger.error(e, exc_info=True) try: - def getRelatedEntity(entityId,accessToken): - urlEntityRelated = config.get("URL","base_url") + "/" + config.get("URL","url_entity_related") + str(entityId) - headersEntityRelated ={ - 'Content-Type': config.get("API_HEADERS","content_type"), - 'Authorization': "Bearer "+ config.get("API_HEADERS","authorization"), - 'X-authenticated-user-token': accessToken, - 'X-Channel-id' : config.get("API_HEADERS","channel-id") - } - responseEntityRelated = requests.get(urlEntityRelated, headers=headersEntityRelated) - if responseEntityRelated.status_code == 200 : - successLogger.debug("entityRelated api") - return responseEntityRelated.json() - else: - errorLogger.error(" Failure in EntityRelatedApi ") - errorLogger.error(responseEntityRelated) - errorLogger.error(responseEntityRelated.text) -except Exception as e: - errorLogger.error(e,exc_info=True) - -try: - def syncUser(userId,accessToken): - urlSyncUser = config.get("URL","sunbird_api_base_url_ip") + "/" + config.get("URL","sunbird_api_url_syncuser") - headersSyncUser ={ - 'Content-Type': config.get("API_HEADERS","content_type"), - 'Authorization': "Bearer "+ config.get("API_HEADERS","authorization"), - 'X-authenticated-user-token': accessToken, - 'X-Channel-id' : config.get("API_HEADERS","channel-id") - } - body_sync_user = {"params": {},"request": {"objectType": "user","objectIds": [userId]}} - responseSyncUser = requests.post(urlSyncUser, headers=headersSyncUser,data=json.dumps(body_sync_user)) - if responseSyncUser.status_code == 200 : - successLogger.debug("user sync api") - return True - else : - errorLogger.error("user sync api failed") - errorLogger.error(responseSyncUser) - errorLogger.error(responseSyncUser.text) -except Exception as e: - errorLogger.error(e,exc_info=True) - -try: - def readUser(userId,accessToken,userSyncCnt): - queryStringReadUser = "?fields=completeness%2CmissingFields%2ClastLoginTime%2Ctopics%2Corganisations%2Croles%2Clocations%2Cdeclarations" - urlReadUser = config.get("URL","sunbird_api_base_url_ip") + "/" + config.get("URL","sunbird_api_url_readuser") \ - + "/" + str(userId) + queryStringReadUser - headersReadUser ={ - 'Content-Type': config.get("API_HEADERS","content_type"), - 'Authorization': "Bearer "+ config.get("API_HEADERS","authorization"), - 'X-authenticated-user-token': accessToken, - 'X-Channel-id' : config.get("API_HEADERS","channel-id") + def getUserRoles(userId): + urlUserRoles = config.get( + "URL", "base_url" + ) + "/" + config.get( + "URL", "url_user_profile_api" + ) + str(userId) + headersUserRoles ={ + 'Content-Type': config.get("API_HEADERS", "content_type"), + 'Authorization': "Bearer "+ config.get("API_HEADERS", "authorization"), + 'internal-access-token': config.get("API_HEADERS", "internal_access_token") } - responseReadUser = requests.get(urlReadUser, headers=headersReadUser) - if responseReadUser.status_code == 200 : - successLogger.debug("read user api") - return responseReadUser.json() + responseUserRoles = requests.get(urlUserRoles, headers=headersUserRoles) + if responseUserRoles.status_code == 200 : + successLogger.debug("user profile api") + return responseUserRoles.json() else: - errorLogger.error("read user api failed") - errorLogger.error(responseReadUser) - errorLogger.error(responseReadUser.text) - if responseReadUser.status_code == 404 : - responseReadUser = responseReadUser.json() - if responseReadUser["params"]["status"] == "USER_NOT_FOUND": - syncUserStatus = syncUser(userId,accessToken) - if syncUserStatus == True and userSyncCnt == 1: - userSyncCnt = userSyncCnt + 1 - readUser(userId,accessToken,userSyncCnt) + errorLogger.error("user profile api failed") + errorLogger.error(responseUserRoles) + errorLogger.error(responseUserRoles.text) except Exception as e: - errorLogger.error(e,exc_info=True) + errorLogger.error(e, exc_info=True) try: - def getUserRoles(userId,accessToken): - urlUserRoles = config.get("URL","base_url") + "/" + config.get("URL","url_user_profile_api") + str(userId) - headersUserRoles ={ - 'Content-Type': config.get("API_HEADERS","content_type"), - 'Authorization': "Bearer "+ config.get("API_HEADERS","authorization"), - 'X-authenticated-user-token': accessToken, - 'X-Channel-id' : config.get("API_HEADERS","channel-id") - } - responseUserRoles = requests.get(urlUserRoles, headers=headersUserRoles) - if responseUserRoles.status_code == 200 : - successLogger.debug("user profile api") - return responseUserRoles.json() - else: - errorLogger.error("user profile api failed") - errorLogger.error(responseUserRoles) - errorLogger.error(responseUserRoles.text) + #initialising the values + class node: + #Construction of Node with component,status and children + def _init_(self, type=None, externalId=None, name=None, children=None): + self.type = type + self.externalId = externalId + self.name = name + if children is None: + self.children = [] + else: + self.children = children + + + #Construction of tree through recursion + class implementation: + def buildnode(self, ob, parent, ansCriteriaId): + node1= node() + node1.type=ob['type'] + node1.externalId=ob['externalId'] + node1.name=ob['name'] + node1.parent = parent + node1.children=[] + + if (node1.type == 'criteria') and (node1.externalId == ansCriteriaId ): + criteriaObj = {} + criteriaObj['type'] = node1.type + criteriaObj['externalId'] = str(node1.externalId) + criteriaObj['name'] = node1.name + criteriaObj['parent'] = parent + domArr.append(criteriaObj) + + try: + for children in ob['children']: + parent = ob['name'] + node1.children.append(self.buildnode(children,parent,ansCriteriaId)) + except KeyError: + if ob['criteria']: + for cri in ob['criteria']: + if str(cri['criteriaId']) == ansCriteriaId : + criObj = {} + criObj['type'] = 'criteria' + criObj['externalId'] = str(cri['criteriaId']) + criObj['name']='' + criObj['parent']=ob['name'] + domArr.append(criObj) + val = len(domArr) + arr = domArr[0:val] + return arr except Exception as e: - errorLogger.error(e,exc_info=True) + errorLogger.error(e, exc_info=True) try: - def obj_creation(msg_id): - data_keyclock = {} - data_keyclock = get_keyclock_accesstoken() - tokenKeyCheck = None - tokenKeyCheck = "access_token" in data_keyclock - if tokenKeyCheck == True : - accessToken= data_keyclock['access_token'] - successLogger.debug("Observation Submission Id : " + str(msg_id)) - cursorMongo = observationSubmissionsDevCollec.find({'_id':ObjectId(msg_id)}, no_cursor_timeout=True) - for obSub in cursorMongo : - observationSubQuestionsArr = [] - completedDate = str(datetime.datetime.date(obSub['completedDate'])) + 'T' + \ - str(datetime.datetime.time(obSub['completedDate'])) + 'Z' - createdAt = str(datetime.datetime.date(obSub['createdAt'])) + 'T' + \ - str(datetime.datetime.time(obSub['createdAt'])) + 'Z' - updatedAt = str(datetime.datetime.date(obSub['updatedAt'])) + 'T' + \ - str(datetime.datetime.time(obSub['updatedAt'])) + 'Z' - evidencesArr = [ v for v in obSub['evidences'].values() ] - evidence_sub_count = 0 - entityId = obSub['entityId'] - - # fetch entity latitude and longitude from the database - entityLatitude = None - entityLongitude = None - for ent in entitiesDevCollec.find({'_id':ObjectId(entityId)}): - try : + def obj_creation(msg_id): + successLogger.debug("Observation Submission Id : " + str(msg_id)) + cursorMongo = obsSubCollec.find( + {'_id':ObjectId(msg_id)}, no_cursor_timeout=True + ) + for obSub in cursorMongo : + observationSubQuestionsArr = [] + completedDate = None + try: + completedDate = str(datetime.datetime.date(obSub['completedDate'])) + 'T' + str(datetime.datetime.time(obSub['completedDate'])) + 'Z' + except KeyError: + pass + createdAt = str(datetime.datetime.date(obSub['createdAt'])) + 'T' + str(datetime.datetime.time(obSub['createdAt'])) + 'Z' + updatedAt = str(datetime.datetime.date(obSub['updatedAt'])) + 'T' + str(datetime.datetime.time(obSub['updatedAt'])) + 'Z' + evidencesArr = [ v for v in obSub['evidences'].values() ] + evidence_sub_count = 0 + entityId = obSub['entityId'] + + # fetch entity latitude and longitude from the database + entityLatitude = None + entityLongitude = None + for ent in entitiesCollec.find({'_id':ObjectId(entityId)}): + try : if ent['metaInformation']['gpsLocation'] : - gpsLocation = (ent['metaInformation']['gpsLocation']).split(',') - entityLatitude = gpsLocation[0] - entityLongitude = gpsLocation[1] - except KeyError : - entityLatitude = None - entityLongitude = None - pass - userSyncCnt = 1 - # fetch user name from postgres with the help of keycloak id - queryJsonOutput = {} - queryJsonOutput = readUser(obSub["createdBy"],accessToken,userSyncCnt) - if queryJsonOutput : - if queryJsonOutput["result"]["response"]["userName"] : - userRoles = {} - obsAppName = None - try : - obsAppName = obSub["appInformation"]["appName"].lower() - except KeyError : - obsAppName = config.get("COMMON","diksha_survey_app_name") - if obsAppName == config.get("COMMON","diksha_survey_app_name") : - userRoles = getUserRoles(obSub["createdBy"],accessToken) - userRolesArr = [] - if userRoles: - userRoleKeyCheck = "result" in userRoles - if userRoleKeyCheck == True : - try : - if len(userRoles["result"]["roles"]) > 0 : - for rol in userRoles["result"]["roles"] : - for ent in rol["entities"]: - userEntityRelated = None - userEntityRelated = getRelatedEntity(ent["_id"],accessToken) - userEntityRelatedResultKeyCheck = None - roleObj = {} - roleObj["role_id"] = rol["_id"] - roleObj["role_externalId"] = rol["code"] - roleObj["role_title"] = rol["title"] - if userEntityRelated: - userEntityRelatedResultKeyCheck = "result" in userEntityRelated - if userEntityRelatedResultKeyCheck == True: - if userEntityRelated["result"]: - if (userEntityRelated["result"]["entityType"] == "district") or \ - (userEntityRelated["result"]["entityType"] == "block") or \ - (userEntityRelated["result"]["entityType"] == "cluster"): - roleObj['user_'+userEntityRelated["result"]["entityType"]+'Name'] = userEntityRelated["result"]["metaInformation"]["name"] - for usrEntityData in userEntityRelated["result"]["relatedEntities"]: - if (usrEntityData['entityType'] == "district") or \ - (usrEntityData['entityType'] == "block") or \ - (usrEntityData['entityType'] == "cluster") : - roleObj['user_'+usrEntityData['entityType']+'Name'] = usrEntityData['metaInformation']['name'] - userRolesArr.append(roleObj) - except KeyError : - pass - userRolesArrUnique = [] - if len(userRolesArr) > 0: - userRolesArrUnique = list(removeduplicate(userRolesArr)) - elif obsAppName == config.get("COMMON","diksha_integrated_app_name") : - userRolesArrUnique = [] - roleObj = {} - roleObj["role_id"] = "integrated_app" - roleObj["role_externalId"] = "integrated_app" - roleObj["role_title"] = queryJsonOutput["result"]["response"]["userSubType"] - try : - for usrLoc in queryJsonOutput["result"]["response"]["userLocations"]: - roleObj['user_'+usrLoc["type"]+'Name'] = usrLoc["name"] - userRolesArrUnique.append(roleObj) - except KeyError : - pass - entityRelated = None - entityRelated = getRelatedEntity(entityId,accessToken) - entityRelatedResultKeyCheck = None - entityRelatedData = None - if entityRelated: - entityRelatedResultKeyCheck = "result" in entityRelated - if entityRelatedResultKeyCheck == True: - entityRelatedData = entityRelated['result'] - - if 'answers' in obSub.keys() : + gpsLocation = (ent['metaInformation']['gpsLocation']).split(',') + entityLatitude = gpsLocation[0] + entityLongitude = gpsLocation[1] + except KeyError : + entityLatitude = '' + entityLongitude = '' + userObj = {} + userObj = datastore.hgetall("user:" + obSub["createdBy"]) + if userObj : + stateName = None + blockName = None + districtName = None + clusterName = None + rootOrgId = None + userSubType = None + userSchool = None + userSchoolUDISE = None + userSchoolName = None + + try: + userSchool = userObj["school"] + except KeyError : + userSchool = '' + + try: + userSchoolUDISE = userObj["schooludisecode"] + except KeyError : + userSchoolUDISE = '' + + try: + userSchoolName = userObj["schoolname"] + except KeyError : + userSchoolName = '' + + try: + userSubType = userObj["usersubtype"] + except KeyError : + userSubType = '' + + try: + stateName = userObj["state"] + except KeyError : + stateName = '' + + try: + blockName = userObj["block"] + except KeyError : + blockName = '' + + try: + districtName = userObj["district"] + except KeyError : + districtName = '' + + try: + clusterName = userObj["cluster"] + except KeyError : + clusterName = '' + + try: + rootOrgId = userObj["rootorgid"] + except KeyError : + rootOrgId = '' + + userRoles = {} + obsAppName = None + try : + obsAppName = obSub["appInformation"]["appName"].lower() + except KeyError : + obsAppName = config.get("COMMON", "diksha_survey_app_name") + userRolesArrUnique = [] + if obsAppName == config.get("COMMON", "diksha_survey_app_name") : + userRoles = getUserRoles(obSub["createdBy"]) + userRolesArr = [] + if userRoles: + userRoleKeyCheck = "result" in userRoles + if userRoleKeyCheck == True : + try : + if len(userRoles["result"]["roles"]) > 0 : + for rol in userRoles["result"]["roles"] : + for ent in rol["entities"]: + userEntityRelated = None + userEntityRelated = getRelatedEntity(ent["_id"]) + userEntityRelatedResultKeyCheck = None + roleObj = {} + roleObj["role_title"] = rol["title"] + if userEntityRelated: + userEntityRelatedResultKeyCheck = "result" in userEntityRelated + if userEntityRelatedResultKeyCheck == True: + if userEntityRelated["result"]: + if (userEntityRelated["result"]["entityType"] == "district") or (userEntityRelated["result"]["entityType"] == "block") or (userEntityRelated["result"]["entityType"] == "cluster") or (userEntityRelated["result"]["entityType"] == "state"): + roleObj['user_'+userEntityRelated["result"]["entityType"]+'Name'] = userEntityRelated["result"]["metaInformation"]["name"] + if userEntityRelated["result"]["entityType"] == "school" : + roleObj['user_schoolName'] = userEntityRelated["result"]["metaInformation"]["name"] + roleObj['user_schoolId'] = str(userEntityRelated["result"]["metaInformation"]["id"]) + roleObj['user_schoolUDISE_code'] = userEntityRelated["result"]["metaInformation"]["externalId"] + for usrEntityData in userEntityRelated["result"]["relatedEntities"]: + if (usrEntityData['entityType'] == "district") or (usrEntityData['entityType'] == "block") or (usrEntityData['entityType'] == "cluster") or (usrEntityData['entityType'] == "state"): + roleObj['user_'+usrEntityData['entityType']+'Name'] = usrEntityData['metaInformation']['name'] + if usrEntityData['entityType'] == "school" : + roleObj['user_schoolName'] = usrEntityData["metaInformation"]["name"] + roleObj['user_schoolId'] = str(usrEntityData["metaInformation"]["id"]) + roleObj['user_schoolUDISE_code'] = usrEntityData["metaInformation"]["externalId"] + userRolesArr.append(roleObj) + except KeyError : + userRolesArr = [] + + if len(userRolesArr) > 0: + userRolesArrUnique = list(removeduplicate(userRolesArr)) + elif obsAppName == config.get("COMMON", "diksha_integrated_app_name"): + roleObj = {} + roleObj["role_title"] = userSubType + roleObj["user_stateName"] = stateName + roleObj["user_blockName"] = blockName + roleObj["user_districtName"] = districtName + roleObj["user_clusterName"] = clusterName + roleObj["user_schoolName"] = userSchoolName + roleObj["user_schoolId"] = userSchool + roleObj["user_schoolUDISE_code"] = userSchoolUDISE + userRolesArrUnique.append(roleObj) + entityRelated = None + entityRelated = getRelatedEntity(entityId) + entityRelatedResultKeyCheck = None + entityRelatedData = None + if entityRelated: + entityRelatedResultKeyCheck = "result" in entityRelated + if entityRelatedResultKeyCheck == True: + entityRelatedData = entityRelated['result'] + + if 'answers' in obSub.keys() : answersArr = [ v for v in obSub['answers'].values()] for ans in answersArr: - try: - if len(ans['fileName']): - evidence_sub_count = evidence_sub_count + len(ans['fileName']) - except KeyError: - pass + try: + if len(ans['fileName']): + evidence_sub_count = evidence_sub_count + len(ans['fileName']) + except KeyError: + evidence_sub_count = 0 for ans in answersArr: - def sequenceNumber(externalId,answer): - for solu in solutionsDevCollec.find({'externalId':obSub['solutionExternalId']}): - section = [k for k in solu['sections'].keys()] - #parsing through questionSequencebyecm to get the sequence number - try: - for num in range(len(solu['questionSequenceByEcm'][answer['evidenceMethod']][section[0]])): - if solu['questionSequenceByEcm'][answer['evidenceMethod']][section[0]][num] == externalId: - return num + 1 - except KeyError: - pass - - def creatingObj(answer,quesexternalId,ans_val,instNumber,responseLabel,entityLatitudeCreateObjFn, - entityLongitudeCreateObjFn,usrRolFn): - observationSubQuestionsObj = {} - observationSubQuestionsObj['userName'] = obSub['evidencesStatus'][0]['submissions'][0]['submittedByName'] - observationSubQuestionsObj['userName'] = observationSubQuestionsObj['userName'].replace("null","") - observationSubQuestionsObj['observationSubmissionId'] = str(obSub['_id']) - observationSubQuestionsObj['appName'] = obsAppName - # geo tag validation , question answered within 200 meters of the selected entity - if entityLatitudeCreateObjFn and entityLongitudeCreateObjFn : - entityGeoFencing = (entityLatitudeCreateObjFn,entityLongitudeCreateObjFn) - answerGpsLocation = [] - try : - if answer['gpsLocation']: - answerGpsLocation = answer['gpsLocation'].split(',') - answerLatitude = None - answerLongitude = None - answerLatitude = answerGpsLocation[0] - answerLongitude = answerGpsLocation[1] - except KeyError : - answerGpsLocation = [] - pass - - if len(answerGpsLocation) > 0 : - answerGeoFencing = (answerLatitude,answerLongitude) - calcuGeoLocMtrs = (geodesic(entityGeoFencing, answerGeoFencing).km)*1000 - calcuGeoLocMtrsFloat = float(calcuGeoLocMtrs) - - if calcuGeoLocMtrsFloat <= float(200) : - observationSubQuestionsObj['location_validated_with_geotag'] = 'verified' - observationSubQuestionsObj['distance_in_meters'] = int(calcuGeoLocMtrsFloat) - - else : - observationSubQuestionsObj['location_validated_with_geotag'] = 'not verified' - observationSubQuestionsObj['distance_in_meters'] = int(calcuGeoLocMtrsFloat) - - else : - observationSubQuestionsObj['location_validated_with_geotag'] = 'gps location not found for question' - observationSubQuestionsObj['distance_in_meters'] = None - else : - observationSubQuestionsObj['location_validated_with_geotag'] = 'gps location not found for school' - observationSubQuestionsObj['distance_in_meters'] = None - - - observationSubQuestionsObj['entity'] = str(obSub['entityId']) - observationSubQuestionsObj['entityExternalId'] = obSub['entityExternalId'] - observationSubQuestionsObj['entityName'] = obSub['entityInformation']['name'] - - if entityRelatedData : - entityType = entityRelatedData["entityType"] - observationSubQuestionsObj[entityType] = entityRelatedData['_id'] - observationSubQuestionsObj[entityType+'Name'] = entityRelatedData['metaInformation']['name'] - observationSubQuestionsObj[entityType+'ExternalId'] = entityRelatedData['metaInformation']['externalId'] - for entityData in entityRelatedData["relatedEntities"]: - if entityData['entityType']: - entityType = entityData['entityType'] - observationSubQuestionsObj[entityType] = entityData['_id'] - observationSubQuestionsObj[entityType+'Name'] = entityData['metaInformation']['name'] - observationSubQuestionsObj[entityType+'ExternalId'] = entityData['metaInformation']['externalId'] - - observationSubQuestionsObj['entityTypeId'] = str(obSub['entityTypeId']) - try: - observationSubQuestionsObj['schoolTypes'] = obSub['entityInformation']['schoolTypes'] - observationSubQuestionsObj['administrationTypes'] = obSub['entityInformation']['administrationTypes'] - except KeyError: - pass - observationSubQuestionsObj['createdBy'] = obSub['createdBy'] - - try: - if obSub['isAPrivateProgram']: - observationSubQuestionsObj['isAPrivateProgram'] = obSub['isAPrivateProgram'] - else: - observationSubQuestionsObj['isAPrivateProgram'] = False - except KeyError: - observationSubQuestionsObj['isAPrivateProgram'] = False - pass - - try: - observationSubQuestionsObj['programExternalId'] = obSub['programExternalId'] - except KeyError : - observationSubQuestionsObj['programExternalId'] = None - try: - observationSubQuestionsObj['programId'] = str(obSub['programId']) - except KeyError : - observationSubQuestionsObj['programId'] = None - try: - for program in programsDevCollec.find({'externalId':obSub['programExternalId']}): - observationSubQuestionsObj['programName'] = program['name'] - except KeyError : - observationSubQuestionsObj['programName'] = None - - observationSubQuestionsObj['solutionExternalId'] = obSub['solutionExternalId'] - observationSubQuestionsObj['observationId'] = str(obSub['observationId']) - for solu in solutionsDevCollec.find({'externalId':obSub['solutionExternalId']}): - observationSubQuestionsObj['solutionName'] = solu['name'] - section = [k for k in solu['sections'].keys()] - observationSubQuestionsObj['section'] = section[0] - observationSubQuestionsObj['questionSequenceByEcm']= sequenceNumber(quesexternalId,answer) - - try: - if solu['scoringSystem'] == 'pointsBasedScoring': - observationSubQuestionsObj['totalScore'] = obSub['pointsBasedMaxScore'] - observationSubQuestionsObj['scoreAchieved'] = obSub['pointsBasedScoreAchieved'] - observationSubQuestionsObj['totalpercentage'] = obSub['pointsBasedPercentageScore'] - observationSubQuestionsObj['maxScore'] = answer['maxScore'] - observationSubQuestionsObj['minScore'] = answer['scoreAchieved'] - observationSubQuestionsObj['percentageScore'] = answer['percentageScore'] - observationSubQuestionsObj['pointsBasedScoreInParent'] = answer['pointsBasedScoreInParent'] - except KeyError: - pass - - for entTy in entityTypeDevCollec.find({'_id':obSub['entityTypeId']},{'name':1}): - observationSubQuestionsObj['entityType'] = entTy['name'] - for ob in observationDevCollec.find({'_id':obSub['observationId']}): - observationSubQuestionsObj['observationName'] = ob['name'] - observationSubQuestionsObj['questionId'] = str(answer['qid']) - observationSubQuestionsObj['questionAnswer'] = ans_val - observationSubQuestionsObj['questionResponseType'] = answer['responseType'] - if answer['responseType'] == 'number': - if answer['payload']['labels']: - observationSubQuestionsObj['questionResponseLabel_number'] = responseLabel - else: - observationSubQuestionsObj['questionResponseLabel_number'] = '' - if answer['payload']['labels']: - observationSubQuestionsObj['questionResponseLabel'] = responseLabel - else: - observationSubQuestionsObj['questionResponseLabel'] = '' - observationSubQuestionsObj['questionExternalId'] = quesexternalId - observationSubQuestionsObj['questionName'] = answer['payload']['question'][0] - observationSubQuestionsObj['questionECM'] = answer['evidenceMethod'] - observationSubQuestionsObj['criteriaId'] = str(answer['criteriaId']) - for crit in obSub["criteria"] : - if str(answer['criteriaId']) == str(crit["_id"]) : - try: - observationSubQuestionsObj['criteriaLevel'] = crit["score"] - except KeyError : - observationSubQuestionsObj['criteriaLevel'] = '' - try: - observationSubQuestionsObj['criteriaScore'] = crit["scoreAchieved"] - except KeyError : - observationSubQuestionsObj['criteriaScore'] = '' - for crit in criteriaDevCollec.find({'_id':ObjectId(answer['criteriaId'])}): - observationSubQuestionsObj['criteriaExternalId'] = crit['externalId'] - observationSubQuestionsObj['criteriaName'] = crit['name'] - observationSubQuestionsObj['completedDate'] = completedDate - observationSubQuestionsObj['createdAt'] = createdAt - observationSubQuestionsObj['updatedAt'] = updatedAt - observationSubQuestionsObj['remarks'] = answer['remarks'] - if len(answer['fileName']): - multipleFiles = None - fileCnt = 1 - for filedetail in answer['fileName']: - if fileCnt == 1: - multipleFiles = 'https://samikshaprod.blob.core.windows.net/samiksha/' + filedetail['sourcePath'] - fileCnt = fileCnt + 1 - else: - multipleFiles = multipleFiles + ' , ' + 'https://samikshaprod.blob.core.windows.net/samiksha/' + \ - filedetail['sourcePath'] - observationSubQuestionsObj['evidences'] = multipleFiles - observationSubQuestionsObj['evidence_count'] = len(answer['fileName']) - observationSubQuestionsObj['total_evidences'] = evidence_sub_count - # to fetch the parent question of matrix - if ans['responseType']=='matrix': - observationSubQuestionsObj['instanceParentQuestion'] = ans['payload']['question'][0] - observationSubQuestionsObj['instanceParentId'] = ans['qid'] - observationSubQuestionsObj['instanceParentResponsetype'] =ans['responseType'] - observationSubQuestionsObj['instanceParentCriteriaId'] =ans['criteriaId'] - for crit in criteriaDevCollec.find({'_id':ObjectId(ans['criteriaId'])}): - observationSubQuestionsObj['instanceParentCriteriaExternalId'] = crit['externalId'] - observationSubQuestionsObj['instanceParentCriteriaName'] = crit['name'] - observationSubQuestionsObj['instanceId'] = instNumber - for ques in questionsDevCollec.find({'_id':ObjectId(ans['qid'])}): - observationSubQuestionsObj['instanceParentExternalId'] = ques['externalId'] - observationSubQuestionsObj['instanceParentEcmSequence']= sequenceNumber(observationSubQuestionsObj['instanceParentExternalId'],answer) - else: - observationSubQuestionsObj['instanceParentQuestion'] = '' - observationSubQuestionsObj['instanceParentId'] = '' - observationSubQuestionsObj['instanceParentResponsetype'] ='' - observationSubQuestionsObj['instanceId'] = instNumber - observationSubQuestionsObj['instanceParentExternalId'] = '' - observationSubQuestionsObj['instanceParentEcmSequence'] = '' - observationSubQuestionsObj['user_id'] = queryJsonOutput["result"]["response"]["userName"] - observationSubQuestionsObj['channel'] = queryJsonOutput["result"]["response"]["rootOrgId"] - observationSubQuestionsObj['parent_channel'] = "SHIKSHALOKAM" - if usrRolFn : - observationSubQuestionsObj = { **usrRolFn , **observationSubQuestionsObj} - observationSubQuestionsObj["submissionNumber"] = obSub["submissionNumber"] - observationSubQuestionsObj["submissionTitle"] = obSub["title"] - - - return observationSubQuestionsObj - # fetching the question details from questions collection - def fetchingQuestiondetails(ansFn,instNumber,entityLatitudeQuesFn,entityLongitudeQuesFn): - for ques in questionsDevCollec.find({'_id':ObjectId(ansFn['qid'])}): - if len(ques['options']) == 0: + def sequenceNumber(externalId, answer, answerSection, solutionObj): + try: + for num in range( + len(solutionObj['questionSequenceByEcm'][answer['evidenceMethod']][answerSection]) + ): + if solutionObj['questionSequenceByEcm'][answer['evidenceMethod']][answerSection][num] == externalId: + return num + 1 + except KeyError: + return '' + + def creatingObj( + answer, quesexternalId, ans_val, instNumber, responseLabel, + entityLatitudeCreateObjFn, entityLongitudeCreateObjFn, usrRolFn + ): + observationSubQuestionsObj = {} + observationSubQuestionsObj['observationSubmissionId'] = str(obSub['_id']) + observationSubQuestionsObj['appName'] = obsAppName + # geo tag validation , question answered within 200 meters of the selected entity + if entityLatitudeCreateObjFn and entityLongitudeCreateObjFn : + entityGeoFencing = (entityLatitudeCreateObjFn,entityLongitudeCreateObjFn) + answerGpsLocation = [] + try : + if answer['gpsLocation']: + answerGpsLocation = answer['gpsLocation'].split(',') + answerLatitude = None + answerLongitude = None + answerLatitude = answerGpsLocation[0] + answerLongitude = answerGpsLocation[1] + except KeyError : + answerGpsLocation = [] + + if len(answerGpsLocation) > 0 : + answerGeoFencing = (answerLatitude,answerLongitude) + calcuGeoLocMtrs = (geodesic(entityGeoFencing, answerGeoFencing).km)*1000 + calcuGeoLocMtrsFloat = float(calcuGeoLocMtrs) + + if calcuGeoLocMtrsFloat <= float(200) : + observationSubQuestionsObj['location_validated_with_geotag'] = 'verified' + observationSubQuestionsObj['distance_in_meters'] = int(calcuGeoLocMtrsFloat) + else : + observationSubQuestionsObj['location_validated_with_geotag'] = 'not verified' + observationSubQuestionsObj['distance_in_meters'] = int(calcuGeoLocMtrsFloat) + else : + observationSubQuestionsObj['location_validated_with_geotag'] = 'gps location not found for question' + observationSubQuestionsObj['distance_in_meters'] = '' + else : + observationSubQuestionsObj['location_validated_with_geotag'] = 'gps location not found for school' + observationSubQuestionsObj['distance_in_meters'] = '' + + observationSubQuestionsObj['entity'] = str(obSub['entityId']) + observationSubQuestionsObj['entityExternalId'] = obSub['entityExternalId'] + observationSubQuestionsObj['entityName'] = obSub['entityInformation']['name'] + + if entityRelatedData : + entityType = entityRelatedData["entityType"] + observationSubQuestionsObj[entityType] = entityRelatedData['_id'] + observationSubQuestionsObj[entityType+'Name'] = entityRelatedData['metaInformation']['name'] + observationSubQuestionsObj[entityType+'ExternalId'] = entityRelatedData['metaInformation']['externalId'] + for entityData in entityRelatedData["relatedEntities"]: + if entityData['entityType']: + entityType = entityData['entityType'] + observationSubQuestionsObj[entityType] = entityData['_id'] + observationSubQuestionsObj[entityType+'Name'] = entityData['metaInformation']['name'] + observationSubQuestionsObj[entityType+'ExternalId'] = entityData['metaInformation']['externalId'] + + observationSubQuestionsObj['entityTypeId'] = str(obSub['entityTypeId']) + + try: + observationSubQuestionsObj['schoolTypes'] = obSub['entityInformation']['schoolTypes'] + except KeyError: + observationSubQuestionsObj['schoolTypes'] = '' + + try: + observationSubQuestionsObj['administrationTypes'] = obSub['entityInformation']['administrationTypes'] + except KeyError: + observationSubQuestionsObj['administrationTypes'] = '' + observationSubQuestionsObj['createdBy'] = obSub['createdBy'] + + try: + if obSub['isAPrivateProgram']: + observationSubQuestionsObj['isAPrivateProgram'] = obSub['isAPrivateProgram'] + else: + observationSubQuestionsObj['isAPrivateProgram'] = False + except KeyError: + observationSubQuestionsObj['isAPrivateProgram'] = False + + try: + observationSubQuestionsObj['programExternalId'] = obSub['programExternalId'] + except KeyError : + observationSubQuestionsObj['programExternalId'] = '' + + try: + observationSubQuestionsObj['programId'] = str(obSub['programId']) + except KeyError : + observationSubQuestionsObj['programId'] = '' + + try: + for pgm in programsCollec.find({"_id":ObjectId(obSub['programId'])}): + observationSubQuestionsObj['programName'] = pgm['name'] + observationSubQuestionsObj['programDescription'] = pgm['description'] + except KeyError : + observationSubQuestionsObj['programName'] = '' + observationSubQuestionsObj['programDescription'] = '' + + observationSubQuestionsObj['solutionExternalId'] = obSub['solutionExternalId'] + observationSubQuestionsObj['solutionId'] = str(obSub['solutionId']) + observationSubQuestionsObj['observationId'] = str(obSub['observationId']) + for critQues in criteriaQuestionsCollec.find({'_id':ObjectId(answer["criteriaId"])}): + observationSubQuestionsObj['criteriaExternalId'] = critQues['externalId'] + observationSubQuestionsObj['criteriaName'] = critQues['name'] + observationSubQuestionsObj['criteriaDescription'] = critQues['description'] + for eviCQ in critQues["evidences"] : + for secCQ in eviCQ["sections"] : + for quesCQ in secCQ["questions"] : + if str(quesCQ["_id"]) == answer["qid"] : + observationSubQuestionsObj['section'] = secCQ["code"] + for solu in solCollec.find({'_id':ObjectId(obSub['solutionId'])}): + solutionObj = {} + solutionObj = solu + + observationSubQuestionsObj['solutionName'] = solutionObj['name'] + observationSubQuestionsObj['scoringSystem'] = solutionObj['scoringSystem'] + observationSubQuestionsObj['solutionDescription'] = solutionObj['description'] + observationSubQuestionsObj['questionSequenceByEcm'] = sequenceNumber(quesexternalId,answer,observationSubQuestionsObj['section'],solutionObj) + + try: + if solutionObj['scoringSystem'] == 'pointsBasedScoring': + try: + observationSubQuestionsObj['totalScore'] = obSub['pointsBasedMaxScore'] + except KeyError : + observationSubQuestionsObj['totalScore'] = '' + try: + observationSubQuestionsObj['scoreAchieved'] = obSub['pointsBasedScoreAchieved'] + except KeyError : + observationSubQuestionsObj['scoreAchieved'] = '' + try: + observationSubQuestionsObj['totalpercentage'] = obSub['pointsBasedPercentageScore'] + except KeyError : + observationSubQuestionsObj['totalpercentage'] = '' + + try: + observationSubQuestionsObj['maxScore'] = answer['maxScore'] + except KeyError : + observationSubQuestionsObj['maxScore'] = '' + + try: + observationSubQuestionsObj['minScore'] = answer['scoreAchieved'] + except KeyError : + observationSubQuestionsObj['minScore'] = '' + + try: + observationSubQuestionsObj['percentageScore'] = answer['percentageScore'] + except KeyError : + observationSubQuestionsObj['percentageScore'] = '' + + try: + observationSubQuestionsObj['pointsBasedScoreInParent'] = answer['pointsBasedScoreInParent'] + except KeyError : + observationSubQuestionsObj['pointsBasedScoreInParent'] = '' + except KeyError: + observationSubQuestionsObj['totalScore'] = '' + observationSubQuestionsObj['scoreAchieved'] = '' + observationSubQuestionsObj['totalpercentage'] = '' + observationSubQuestionsObj['maxScore'] = '' + observationSubQuestionsObj['minScore'] = '' + observationSubQuestionsObj['percentageScore'] = '' + observationSubQuestionsObj['pointsBasedScoreInParent'] = '' + + observationSubQuestionsObj['entityType'] = obSub['entityType'] + + try: + for ob in obsCollec.find({'_id':obSub['observationId']},{'name':1}): + observationSubQuestionsObj['observationName'] = ob['name'] + except KeyError : + observationSubQuestionsObj['observationName'] = '' + + observationSubQuestionsObj['questionId'] = str(answer['qid']) + observationSubQuestionsObj['questionAnswer'] = ans_val + observationSubQuestionsObj['questionResponseType'] = answer['responseType'] + if answer['responseType'] == 'number': + if answer['payload']['labels']: + observationSubQuestionsObj['questionResponseLabel_number'] = responseLabel + else: + observationSubQuestionsObj['questionResponseLabel_number'] = 0 + if answer['payload']['labels']: + observationSubQuestionsObj['questionResponseLabel'] = responseLabel + else: + observationSubQuestionsObj['questionResponseLabel'] = '' + observationSubQuestionsObj['questionExternalId'] = quesexternalId + observationSubQuestionsObj['questionName'] = answer['payload']['question'][0] + observationSubQuestionsObj['questionECM'] = answer['evidenceMethod'] + observationSubQuestionsObj['criteriaId'] = str(answer['criteriaId']) + observationSubQuestionsObj['completedDate'] = completedDate + observationSubQuestionsObj['createdAt'] = createdAt + observationSubQuestionsObj['updatedAt'] = updatedAt + observationSubQuestionsObj['remarks'] = answer['remarks'] + if len(answer['fileName']): + multipleFiles = None + fileCnt = 1 + for filedetail in answer['fileName']: + if fileCnt == 1: + multipleFiles = config.get('URL', 'evidence_base_url') + filedetail['sourcePath'] + fileCnt = fileCnt + 1 + else: + multipleFiles = multipleFiles + ' , ' + config.get('URL', 'evidence_base_url') + filedetail['sourcePath'] + observationSubQuestionsObj['evidences'] = multipleFiles + observationSubQuestionsObj['evidence_count'] = len(answer['fileName']) + observationSubQuestionsObj['total_evidences'] = evidence_sub_count + + # to fetch the parent question of matrix + if ans['responseType']=='matrix': + observationSubQuestionsObj['instanceParentQuestion'] = ans['payload']['question'][0] + observationSubQuestionsObj['instanceParentId'] = ans['qid'] + observationSubQuestionsObj['instanceParentResponsetype'] =ans['responseType'] + observationSubQuestionsObj['instanceParentCriteriaId'] =ans['criteriaId'] + for critQuesInst in criteriaQuestionsCollec.find({'_id':ObjectId(ans["criteriaId"])}): + observationSubQuestionsObj['instanceParentCriteriaExternalId'] = critQuesInst['externalId'] + observationSubQuestionsObj['instanceParentCriteriaExternalId'] = critQuesInst['name'] + for eviCQInst in critQuesInst["evidences"] : + for secCQInst in eviCQInst["sections"] : + for quesCQInst in secCQInst["questions"] : + if str(quesCQInst["_id"]) == ans["qid"] : + observationSubQuestionsObj['instanceParentSection'] = secCQInst["code"] + observationSubQuestionsObj['instanceId'] = instNumber + for ques in questionsCollec.find({'_id':ObjectId(ans['qid'])}): + observationSubQuestionsObj['instanceParentExternalId'] = ques['externalId'] + observationSubQuestionsObj['instanceParentEcmSequence']= sequenceNumber( + observationSubQuestionsObj['instanceParentExternalId'], answer, + observationSubQuestionsObj['instanceParentSection'], solutionObj + ) + else: + observationSubQuestionsObj['instanceParentQuestion'] = '' + observationSubQuestionsObj['instanceParentId'] = '' + observationSubQuestionsObj['instanceParentResponsetype'] ='' + observationSubQuestionsObj['instanceId'] = instNumber + observationSubQuestionsObj['instanceParentExternalId'] = '' + observationSubQuestionsObj['instanceParentEcmSequence'] = '' + observationSubQuestionsObj['channel'] = rootOrgId + observationSubQuestionsObj['parent_channel'] = "SHIKSHALOKAM" + + ### Assessment Domain Logic - Start ### + domainArr = [] + for domain in solutionObj['themes']: + parent = None + builder = None + parent = domain['name'] + builder = implementation() + domObj = {} + domObj['name'] = domain['name'] + domObj['type'] = domain['type'] + domObj['externalId']=str(domain['externalId']) + + try: + if domain['criteria']: + domObj['theme']=builder.buildnode(domain, parent, str(answer['criteriaId'])) + except KeyError: + domObj['theme'] = builder.buildnode(domain, parent, str(answer['criteriaId'])) + + domainArr.append(domObj) + domArr.clear() + + for dom in domainArr: + if dom['theme']: + for obj in dom['theme']: + try: + if obj['type'] == 'criteria': + if (str(obj['externalId']) == str(answer['criteriaId'])): + for criteria in obSub['criteria'] : + if str(criteria["_id"]) == str(answer['criteriaId']) : + obj['name'] = criteria['name'] + obj['score'] = criteria['score'] + try: + obj['score_achieved'] = criteria['scoreAchieved'] + except KeyError : + obj['score_achieved'] = '' + obj['description'] = criteria['description'] + try: + levelArray = [] + levelArray = criteria['rubric']['levels'].values() + for labelValue in levelArray: + if (str((criteria['score'])) == labelValue['level']): + obj['label'] = labelValue['label'] + except Exception: + obj['label'] = '' + try: - if len(ansFn['payload']['labels']) > 0: - if(len(userRolesArrUnique)) > 0: - for usrRol in userRolesArrUnique : - finalObj = {} - finalObj = creatingObj(ansFn,ques['externalId'],ansFn['value'],instNumber, - ansFn['payload']['labels'][0], - entityLatitudeQuesFn,entityLongitudeQuesFn,usrRol) - producer.send((config.get("KAFKA","kafka_druid_topic")), json.dumps(finalObj) - .encode('utf-8')) - producer.flush() - successLogger.debug("Send Obj to Kafka") - else : - finalObj = {} - finalObj = creatingObj(ansFn,ques['externalId'],ansFn['value'],instNumber, - ansFn['payload']['labels'][0], - entityLatitudeQuesFn,entityLongitudeQuesFn,None) - producer.send((config.get("KAFKA","kafka_druid_topic")), json.dumps(finalObj) - .encode('utf-8')) - producer.flush() - successLogger.debug("Send Obj to Kafka") + prj_id = [] + title = [] + goal = [] + externalId =[] + for prj in criteria['improvement-projects']: + prj_id.append(str(prj['_id'])) + title.append(prj['title']) + goal.append(prj['goal']) + externalId.append(prj['externalId']) + obj['imp_project_id'] = prj_id + obj['imp_project_title'] = title + obj['imp_project_goal'] = goal + obj['imp_project_externalId'] = externalId except KeyError: - pass - else: - labelIndex = 0 - for quesOpt in ques['options']: - try: - if type(ansFn['value']) == str or type(ansFn['value']) == int: - if quesOpt['value'] == ansFn['value'] : - if(len(userRolesArrUnique)) > 0: - for usrRol in userRolesArrUnique : - finalObj = {} - finalObj = creatingObj(ansFn,ques['externalId'],ansFn['value'], - instNumber,ansFn['payload']['labels'][0], - entityLatitudeQuesFn,entityLongitudeQuesFn,usrRol) - producer.send((config.get("KAFKA","kafka_druid_topic")), json.dumps(finalObj) - .encode('utf-8')) - producer.flush() - successLogger.debug("Send Obj to Kafka") - else : - finalObj = {} - finalObj = creatingObj(ansFn,ques['externalId'],ansFn['value'], - instNumber,ansFn['payload']['labels'][0], - entityLatitudeQuesFn,entityLongitudeQuesFn,None) - producer.send((config.get("KAFKA","kafka_druid_topic")), json.dumps(finalObj) - .encode('utf-8')) - producer.flush() - successLogger.debug("Send Obj to Kafka") - - elif type(ansFn['value']) == list: - for ansArr in ansFn['value']: - if quesOpt['value'] == ansArr: - if(len(userRolesArrUnique)) > 0: - for usrRol in userRolesArrUnique : - finalObj = {} - finalObj = creatingObj(ansFn,ques['externalId'],ansArr,instNumber, - quesOpt['label'], - entityLatitudeQuesFn, - entityLongitudeQuesFn,usrRol) - producer.send((config.get("KAFKA","kafka_druid_topic")), json.dumps(finalObj) - .encode('utf-8')) - producer.flush() - successLogger.debug("Send Obj to Kafka") - else : - finalObj = {} - finalObj = creatingObj(ansFn,ques['externalId'],ansArr,instNumber, - quesOpt['label'], - entityLatitudeQuesFn, - entityLongitudeQuesFn,None) - producer.send((config.get("KAFKA","kafka_druid_topic")), json.dumps(finalObj) - .encode('utf-8')) - producer.flush() - successLogger.debug("Send Obj to Kafka") - labelIndex = labelIndex + 1 - except KeyError: - pass - #to check the value is null ie is not answered - try: - if type(ansFn['value']) == str and ansFn['value'] == '': - if(len(userRolesArrUnique)) > 0: - for usrRol in userRolesArrUnique : - finalObj = {} - finalObj = creatingObj(ansFn,ques['externalId'],ansFn['value'],instNumber,None, - entityLatitudeQuesFn, - entityLongitudeQuesFn,usrRol) - producer.send((config.get("KAFKA","kafka_druid_topic")), json.dumps(finalObj) - .encode('utf-8')) - producer.flush() - successLogger.debug("Send Obj to Kafka") - else : - finalObj = {} - finalObj = creatingObj(ansFn,ques['externalId'],ansFn['value'],instNumber,None, - entityLatitudeQuesFn,entityLongitudeQuesFn,None) - producer.send((config.get("KAFKA","kafka_druid_topic")), json.dumps(finalObj) - .encode('utf-8')) - producer.flush() - successLogger.debug("Send Obj to Kafka") - except KeyError: - pass - - if ans['responseType'] == 'text' or ans['responseType'] == 'radio' \ - or ans['responseType'] == 'multiselect' or ans['responseType'] == 'slider' \ - or ans['responseType'] == 'number' or ans['responseType'] == 'date': - inst_cnt = '' - fetchingQuestiondetails(ans,inst_cnt,entityLatitude,entityLongitude) - elif ans['responseType'] == 'matrix' and len(ans['value'])>0: - inst_cnt =0 - for instances in ans['value']: - inst_cnt = inst_cnt + 1 - for instance in instances.values(): - fetchingQuestiondetails(instance,inst_cnt,entityLatitude,entityLongitude) - - cursorMongo.close() + obj['imp_project_id'] = [] + obj['imp_project_title'] = [] + obj['imp_project_goal'] = [] + obj['imp_project_externalId'] = [] + if type(obj['externalId']) != str: + for cri in criteriaCollec.find({'_id':ObjectId(str(obj['externalId']))}): + obj['externalId'] = cri['externalId'] + obj['name']=cri['name'] + obj['score']=cri['score'] + obj['score_achieved'] = criteria['scoreAchieved'] + obj['description'] = cri['description'] + try: + levelArray = [] + levelArray = cri['rubric']['levels'].values() + for labelValue in levelArray: + if (str((cri['score'])) == labelValue['level']): + obj['label'] = labelValue['label'] + except Exception: + obj['label'] = '' + except KeyError: + pass + + for themes in domainArr: + for st in themes["theme"]: + if (st["type"] == "criteria") and (observationSubQuestionsObj['criteriaId'] == str(st["externalId"])): + observationSubQuestionsObj['domainName'] = themes['name'] + observationSubQuestionsObj['domainExternalId'] = themes['externalId'] + try : + for submTheme in obSub["themes"]: + if submTheme["externalId"] == themes['externalId'] : + observationSubQuestionsObj['domainLevel'] = submTheme["pointsBasedLevel"] + observationSubQuestionsObj['domainScore'] = submTheme["scoreAchieved"] + except KeyError : + observationSubQuestionsObj['domainLevel'] = '' + observationSubQuestionsObj['domainScore'] = '' + for theme in themes['theme']: + observationSubQuestionsObj['childName'] = theme['name'] + observationSubQuestionsObj['ancestorName'] = theme['parent'] + observationSubQuestionsObj['childType'] = theme['type'] + observationSubQuestionsObj['childExternalid'] = theme['externalId'] + + try: + observationSubQuestionsObj['level'] = theme['score'] + except KeyError: + observationSubQuestionsObj['level'] = '' + + try: + observationSubQuestionsObj['criteriaScore'] = theme['score_achieved'] + except KeyError: + observationSubQuestionsObj['criteriaScore'] = '' + + try: + observationSubQuestionsObj['label'] = theme['label'] + except KeyError: + observationSubQuestionsObj['label'] = '' + + try: + if (len(theme['imp_project_id']) >=0): + for i in range(len(theme['imp_project_id'])): + observationSubQuestionsObj['imp_project_id'] = theme['imp_project_id'][i] + observationSubQuestionsObj['imp_project_title'] = theme['imp_project_title'][i] + observationSubQuestionsObj['imp_project_goal'] = theme['imp_project_goal'][i] + observationSubQuestionsObj['imp_project_externalId'] = theme['imp_project_externalId'][i] + except KeyError: + observationSubQuestionsObj['imp_project_id'] = "" + observationSubQuestionsObj['imp_project_title'] = "" + observationSubQuestionsObj['imp_project_goal'] = "" + observationSubQuestionsObj['imp_project_externalId'] = "" + + if usrRolFn : + observationSubQuestionsObj = {**usrRolFn, **observationSubQuestionsObj} + observationSubQuestionsObj["submissionNumber"] = obSub["submissionNumber"] + observationSubQuestionsObj["submissionTitle"] = obSub["title"] + try: + observationSubQuestionsObj["criteriaLevelReport"] = obSub["criteriaLevelReport"] + except KeyError : + observationSubQuestionsObj["criteriaLevelReport"] = '' + + return observationSubQuestionsObj + + def fetchingQuestiondetails(ansFn, instNumber, entityLatitudeQuesFn, entityLongitudeQuesFn): + for ques in questionsCollec.find({'_id':ObjectId(ansFn['qid'])}): + if len(ques['options']) == 0: + try: + if len(ansFn['payload']['labels']) > 0: + if(len(userRolesArrUnique)) > 0: + for usrRol in userRolesArrUnique : + finalObj = {} + finalObj = creatingObj( + ansFn,ques['externalId'], + ansFn['value'],instNumber, + ansFn['payload']['labels'][0], + entityLatitudeQuesFn, + entityLongitudeQuesFn,usrRol + ) + if finalObj["completedDate"]: + producer.send( + (config.get("KAFKA", "kafka_druid_topic")), + json.dumps(finalObj).encode('utf-8') + ) + producer.flush() + successLogger.debug("Send Obj to Kafka") + else : + finalObj = {} + finalObj = creatingObj( + ansFn,ques['externalId'], + ansFn['value'], + instNumber, + ansFn['payload']['labels'][0], + entityLatitudeQuesFn, + entityLongitudeQuesFn, + None + ) + if finalObj["completedDate"]: + producer.send( + (config.get("KAFKA", "kafka_druid_topic")), + json.dumps(finalObj).encode('utf-8') + ) + producer.flush() + successLogger.debug("Send Obj to Kafka") + except KeyError: + pass + else: + labelIndex = 0 + for quesOpt in ques['options']: + try: + if type(ansFn['value']) == str or type(ansFn['value']) == int: + if quesOpt['value'] == ansFn['value'] : + if(len(userRolesArrUnique)) > 0: + for usrRol in userRolesArrUnique : + finalObj = {} + finalObj = creatingObj( + ansFn, + ques['externalId'], + ansFn['value'], + instNumber, + ansFn['payload']['labels'][0], + entityLatitudeQuesFn, + entityLongitudeQuesFn, + usrRol + ) + if finalObj["completedDate"]: + producer.send( + (config.get("KAFKA", "kafka_druid_topic")), + json.dumps(finalObj).encode('utf-8') + ) + producer.flush() + successLogger.debug("Send Obj to Kafka") + else : + finalObj = {} + finalObj = creatingObj( + ansFn,ques['externalId'], + ansFn['value'], + instNumber, + ansFn['payload']['labels'][0], + entityLatitudeQuesFn, + entityLongitudeQuesFn, + None + ) + if finalObj["completedDate"]: + producer.send( + (config.get("KAFKA", "kafka_druid_topic")), + json.dumps(finalObj).encode('utf-8') + ) + producer.flush() + successLogger.debug("Send Obj to Kafka") + + elif type(ansFn['value']) == list: + for ansArr in ansFn['value']: + if quesOpt['value'] == ansArr: + if(len(userRolesArrUnique)) > 0: + for usrRol in userRolesArrUnique : + finalObj = {} + finalObj = creatingObj( + ansFn, + ques['externalId'], + ansArr, + instNumber, + quesOpt['label'], + entityLatitudeQuesFn, + entityLongitudeQuesFn, + usrRol + ) + if finalObj["completedDate"]: + producer.send( + (config.get("KAFKA", "kafka_druid_topic")), + json.dumps(finalObj).encode('utf-8') + ) + producer.flush() + successLogger.debug("Send Obj to Kafka") + else : + finalObj = {} + finalObj = creatingObj( + ansFn, + ques['externalId'], + ansArr, + instNumber, + quesOpt['label'], + entityLatitudeQuesFn, + entityLongitudeQuesFn, + None + ) + if finalObj["completedDate"]: + producer.send( + (config.get("KAFKA", "kafka_druid_topic")), + json.dumps(finalObj).encode('utf-8') + ) + producer.flush() + successLogger.debug("Send Obj to Kafka") + labelIndex = labelIndex + 1 + except KeyError: + pass + #to check the value is null ie is not answered + try: + if type(ansFn['value']) == str and ansFn['value'] == '': + if(len(userRolesArrUnique)) > 0: + for usrRol in userRolesArrUnique : + finalObj = {} + finalObj = creatingObj( + ansFn, + ques['externalId'], + ansFn['value'], + instNumber, + None, + entityLatitudeQuesFn, + entityLongitudeQuesFn, + usrRol + ) + if finalObj["completedDate"]: + producer.send( + (config.get("KAFKA", "kafka_druid_topic")), + json.dumps(finalObj).encode('utf-8') + ) + producer.flush() + successLogger.debug("Send Obj to Kafka") + else : + finalObj = {} + finalObj = creatingObj( + ansFn, + ques['externalId'], + ansFn['value'], + instNumber, + None, + entityLatitudeQuesFn, + entityLongitudeQuesFn, + None + ) + if finalObj["completedDate"]: + producer.send( + (config.get("KAFKA", "kafka_druid_topic")), + json.dumps(finalObj).encode('utf-8') + ) + producer.flush() + successLogger.debug("Send Obj to Kafka") + except KeyError: + pass + + if ( + ans['responseType'] == 'text' or ans['responseType'] == 'radio' or + ans['responseType'] == 'multiselect' or ans['responseType'] == 'slider' or + ans['responseType'] == 'number' or ans['responseType'] == 'date' + ): + inst_cnt = '' + fetchingQuestiondetails(ans,inst_cnt, entityLatitude, entityLongitude) + elif ans['responseType'] == 'matrix' and len(ans['value']) > 0: + inst_cnt =0 + for instances in ans['value']: + inst_cnt = inst_cnt + 1 + for instance in instances.values(): + fetchingQuestiondetails(instance, inst_cnt, entityLatitude, entityLongitude) + cursorMongo.close() except Exception as e: - errorLogger.error(e,exc_info=True) + errorLogger.error(e, exc_info=True) try: - @app.agent(rawTopicName) - async def observationFaust(consumer) : - async for msg in consumer : + @app.agent(rawTopicName) + async def observationFaust(consumer) : + async for msg in consumer : msg_val = msg.decode('utf-8') msg_data = json.loads(msg_val) successLogger.debug("========== START OF OBSERVATION SUBMISSION ========") obj_arr = obj_creation(msg_data['_id']) successLogger.debug("********* END OF OBSERVATION SUBMISSION ***********") except Exception as e: - errorLogger.error(e,exc_info=True) + errorLogger.error(e, exc_info=True) if __name__ == '__main__': - app.main() + app.main() + diff --git a/observations/pyspark_observation_status_batch.py b/observations/pyspark_observation_status_batch.py index 1723cc5..73d3d14 100644 --- a/observations/pyspark_observation_status_batch.py +++ b/observations/pyspark_observation_status_batch.py @@ -2,14 +2,15 @@ # Name : pyspark_observation_status_batch.py # Author : Shakthieshwari.A # Description : Extracts the Status of the observation submissions -# either notStarted / In-Progress / Completed along with the users entity information - +# either notStarted / In-Progress / Completed along with the users +# entity information # ----------------------------------------------------------------- + import requests -import json,csv,sys,os,time +import json, csv, sys, os, time, redis import datetime from datetime import date -from configparser import ConfigParser,ExtendedInterpolation +from configparser import ConfigParser, ExtendedInterpolation from pymongo import MongoClient from bson.objectid import ObjectId from pyspark.sql import SparkSession @@ -17,21 +18,19 @@ import pyspark.sql.functions as F from pyspark.sql.types import * from pyspark.sql import Row -from collections import OrderedDict,Counter +from collections import OrderedDict, Counter from cassandra.cluster import Cluster -from cassandra.query import SimpleStatement,ConsistencyLevel +from cassandra.query import SimpleStatement, ConsistencyLevel import databricks.koalas as ks from azure.storage.blob import BlockBlobService, PublicAccess from azure.storage.blob import ContentSettings import logging import logging.handlers -import time from logging.handlers import TimedRotatingFileHandler - -config_path = os.path.dirname(os.path.abspath(__file__)) +config_path = os.path.split(os.path.dirname(os.path.abspath(__file__))) config = ConfigParser(interpolation=ExtendedInterpolation()) -config.read(config_path + "/config.ini") +config.read(config_path[0] + "/config.ini") formatter = logging.Formatter('%(asctime)s - %(levelname)s') @@ -39,208 +38,242 @@ successLogger.setLevel(logging.DEBUG) # Add the log message handler to the logger -successHandler = logging.handlers.RotatingFileHandler(config.get('LOGS','observation_status_success_log_filename')) -successBackuphandler = TimedRotatingFileHandler(config.get('LOGS','observation_status_success_log_filename'), - when="w0",backupCount=1) +successHandler = logging.handlers.RotatingFileHandler( + config.get('LOGS','observation_status_success_log_filename') +) +successBackuphandler = TimedRotatingFileHandler( + config.get('LOGS','observation_status_success_log_filename'), + when="w0", + backupCount=1 +) successHandler.setFormatter(formatter) successLogger.addHandler(successHandler) successLogger.addHandler(successBackuphandler) errorLogger = logging.getLogger('error log') errorLogger.setLevel(logging.ERROR) -errorHandler = logging.handlers.RotatingFileHandler(config.get('LOGS','observation_status_error_log_filename')) -errorBackuphandler = TimedRotatingFileHandler(config.get('LOGS','observation_status_error_log_filename'), - when="w0",backupCount=1) +errorHandler = logging.handlers.RotatingFileHandler( + config.get('LOGS','observation_status_error_log_filename') +) +errorBackuphandler = TimedRotatingFileHandler( + config.get('LOGS','observation_status_error_log_filename'), + when="w0", + backupCount=1 +) errorHandler.setFormatter(formatter) errorLogger.addHandler(errorHandler) errorLogger.addHandler(errorBackuphandler) try: - def get_keyclock_accesstoken(): - url_getkeyclock = config.get("URL","url_getkeyclock") - headers_getkeyclock = {'Content-Type': 'application/x-www-form-urlencoded'} - body_getkeyclock = {"grant_type":config.get("API_HEADERS","grant_type"), - "client_id":config.get("API_HEADERS","client_id"), - "refresh_token":config.get("API_HEADERS","refresh_token")} - - responsegetkeyclock = requests.post(url_getkeyclock, data=body_getkeyclock,headers=headers_getkeyclock) - if responsegetkeyclock.status_code == 200: - successLogger.debug("getkeyclock api") - return responsegetkeyclock.json() - else: - errorLogger.error("Failure in getkeyclock API") - errorLogger.error(responsegetkeyclock) - errorLogger.error(responsegetkeyclock.text) -except Exception as e: - errorLogger.error(e,exc_info=True) - -try: - def searchUser(accessToken,userId): - queryStringReadUser = "?fields=completeness%2CmissingFields%2ClastLoginTime%2Ctopics%2Corganisations%2Croles%2Clocations%2Cdeclarations" - urlReadUser = config.get("URL","sunbird_api_base_url_ip") + "/" + config.get("URL","sunbird_api_url_readuser") \ - + "/" + str(userId) + queryStringReadUser - headersReadUser ={ - 'Content-Type': config.get("API_HEADERS","content_type"), - 'Authorization': "Bearer "+ config.get("API_HEADERS","authorization"), - 'X-authenticated-user-token': accessToken - } - - try: - responseReadUser = requests.get(urlReadUser, headers=headersReadUser) - if responseReadUser.status_code == 200 : - return responseReadUser.json() - else: - successLogger.debug("Failure in Search User API") - successLogger.debug(responseReadUser.status_code) - successLogger.debug(responseReadUser.json()) - except Exception as e : - errorLogger.error("Search User API Failed") - errorLogger.error(e) - errorLogger.error(e,exc_info=True) -except Exception as e: - errorLogger.error(e,exc_info=True) - -get_keycloak_obj = get_keyclock_accesstoken() - -try: - def removeduplicate(it): - seen = [] - for x in it: - if x not in seen: - yield x - seen.append(x) + def removeduplicate(it): + seen = [] + for x in it: + if x not in seen: + yield x + seen.append(x) except Exception as e: - errorLogger.error(e,exc_info=True) + errorLogger.error(e, exc_info=True) try: - def chunks(l, n): - for i in range(0, len(l), n): - yield l[i:i + n] + def chunks(l, n): + for i in range(0, len(l), n): + yield l[i:i + n] except Exception as e: - errorLogger.error(e,exc_info=True) + errorLogger.error(e, exc_info=True) try: - def convert_to_row(d: dict) -> Row: - return Row(**OrderedDict(sorted(d.items()))) + def convert_to_row(d: dict) -> Row: + return Row(**OrderedDict(sorted(d.items()))) except Exception as e: - errorLogger.error(e,exc_info=True) + errorLogger.error(e,exc_info=True) -clientProd = MongoClient(config.get('MONGO','mongo_url')) -dbProd = clientProd[config.get('MONGO','database_name')] +clientProd = MongoClient(config.get('MONGO', 'mongo_url')) +db = clientProd[config.get('MONGO', 'database_name')] +obsSubmissionsCollec = db[config.get('MONGO', 'observation_sub_collec')] +solutionCollec = db[config.get('MONGO', 'solutions_collec')] +userRolesCollec = db[config.get("MONGO", 'user_roles_collection')] +programCollec = db[config.get("MONGO", 'programs_collec')] -obsSubmissionsCollec = dbProd[config.get('MONGO','observation_sub_collec')] - -solutionCollec = dbProd[config.get('MONGO','solutions_collec')] - -userRolesCollec = dbProd[config.get("MONGO","user_roles_collection")] - -programCollec = dbProd[config.get("MONGO","programs_collec")] +# redis cache connection +redis_connection = redis.ConnectionPool( + host=config.get("REDIS", "host"), + decode_responses=True, + port=config.get("REDIS", "port"), + db=config.get("REDIS", "db_name") +) +datastore = redis.StrictRedis(connection_pool=redis_connection) #observation submission dataframe -obs_sub_cursorMongo = obsSubmissionsCollec.aggregate([{"$project": {"_id": {"$toString": "$_id"}, - "entityId":{"$toString": "$entityId"},"status":1, - "entityExternalId":1,"entityInformation":{"name":1}, - "entityType":1,"createdBy":1, - "solutionId":{"$toString": "$solutionId"}, - "solutionExternalId":1,"updatedAt":1, - "programId":{"$toString": "$programId"}, - "programExternalId":1, - "appInformation":{"appName":1} - } - } - ] - ) +obs_sub_cursorMongo = obsSubmissionsCollec.aggregate( + [{ + "$project": { + "_id": {"$toString": "$_id"}, + "entityId": {"$toString": "$entityId"}, + "status": 1, + "entityExternalId": 1, + "entityInformation": {"name": 1}, + "entityType": 1, + "createdBy": 1, + "solutionId": {"$toString": "$solutionId"}, + "solutionExternalId": 1, + "updatedAt": 1, + "programId": {"$toString": "$programId"}, + "programExternalId": 1, + "appInformation": {"appName": 1}, + "isAPrivateProgram": 1 + } + }] +) #schema for the observation submission dataframe -obs_sub_schema = StructType([ - StructField('status', StringType(), True), - StructField('entityExternalId', StringType(), True), - StructField('entityId', StringType(), True), - StructField('entityType', StringType(), True), - StructField('createdBy', StringType(), True), - StructField('solutionId', StringType(), True), - StructField('solutionExternalId', StringType(), True), - StructField('programId', StringType(), True), - StructField('programExternalId', StringType(), True), - StructField('_id', StringType(), True), - StructField('updatedAt', TimestampType(), True), - StructField('entityInformation',StructType([ - StructField('name', StringType(), True) - ])), - StructField('appInformation',StructType([ - StructField('appName', StringType(), True) - ])) - ]) -spark = SparkSession.builder.appName("obs_sub_status").config("spark.driver.memory", "50g")\ - .config("spark.executor.memory","100g")\ - .config("spark.memory.offHeap.enabled",True)\ - .config("spark.memory.offHeap.size","32g").getOrCreate() +obs_sub_schema = StructType( + [ + StructField('status', StringType(), True), + StructField('entityExternalId', StringType(), True), + StructField('entityId', StringType(), True), + StructField('entityType', StringType(), True), + StructField('createdBy', StringType(), True), + StructField('solutionId', StringType(), True), + StructField('solutionExternalId', StringType(), True), + StructField('programId', StringType(), True), + StructField('programExternalId', StringType(), True), + StructField('_id', StringType(), True), + StructField('updatedAt', TimestampType(), True), + StructField('isAPrivateProgram', BooleanType(), True), + StructField( + 'entityInformation', + StructType([StructField('name', StringType(), True)]) + ), + StructField( + 'appInformation', + StructType([StructField('appName', StringType(), True)]) + ) + ] +) + +spark = SparkSession.builder.appName( + "obs_sub_status" +).config( + "spark.driver.memory", "50g" +).config( + "spark.executor.memory", "100g" +).config( + "spark.memory.offHeap.enabled", True +).config( + "spark.memory.offHeap.size", "32g" +).getOrCreate() sc=spark.sparkContext -obs_sub_rdd = spark.sparkContext.parallelize(list(obs_sub_cursorMongo)); -obs_sub_df1 = spark.createDataFrame(obs_sub_rdd,obs_sub_schema); - -obs_sub_df1 = obs_sub_df1.withColumn("date_time", to_timestamp(obs_sub_df1["updatedAt"], 'yyyy-MM-dd HH:mm:ss')) - -obs_sub_df1 = obs_sub_df1.withColumn("date",F.split(obs_sub_df1["date_time"], ' ')[0]) -obs_sub_df1 = obs_sub_df1.withColumn("time",F.split(obs_sub_df1["date_time"], ' ')[1]) - -obs_sub_df1 = obs_sub_df1.withColumn("app_name",\ - F.when(obs_sub_df1["appInformation"]["appName"].isNull(), - F.lit(config.get("COMMON","diksha_survey_app_name"))) - .otherwise(lower(obs_sub_df1["appInformation"]["appName"]))) - -obs_sub_df1 = obs_sub_df1.withColumn("timestamp",F.concat(F.col("date"),F.lit("T"),F.col("time"),F.lit(".000Z"))) -obs_sub_df = obs_sub_df1.select("status",obs_sub_df1["entityExternalId"].alias("entity_externalId"), - obs_sub_df1["entityId"].alias("entity_id"), - obs_sub_df1["entityType"].alias("entity_type"), - obs_sub_df1["createdBy"].alias("user_id"), - obs_sub_df1["solutionId"].alias("solution_id"), - obs_sub_df1["solutionExternalId"].alias("solution_externalId"), - obs_sub_df1["_id"].alias("submission_id"), - obs_sub_df1["entityInformation"]["name"].alias("entity_name"), - "timestamp",obs_sub_df1["programId"].alias("program_id"), - obs_sub_df1["programExternalId"].alias("program_externalId"), - obs_sub_df1["app_name"]) +obs_sub_rdd = spark.sparkContext.parallelize(list(obs_sub_cursorMongo)) +obs_sub_df1 = spark.createDataFrame(obs_sub_rdd,obs_sub_schema) + +obs_sub_df1 = obs_sub_df1.withColumn( + "date_time", to_timestamp(obs_sub_df1["updatedAt"], 'yyyy-MM-dd HH:mm:ss') +) + +obs_sub_df1 = obs_sub_df1.withColumn( + "date", F.split(obs_sub_df1["date_time"], ' ')[0] +) +obs_sub_df1 = obs_sub_df1.withColumn( + "time", F.split(obs_sub_df1["date_time"], ' ')[1] +) + +obs_sub_df1 = obs_sub_df1.withColumn( + "app_name", + F.when( + obs_sub_df1["appInformation"]["appName"].isNull(), + F.lit(config.get("COMMON", "diksha_survey_app_name")) + ).otherwise( + lower(obs_sub_df1["appInformation"]["appName"]) + ) +) + +obs_sub_df1 = obs_sub_df1.withColumn( + "private_program", + F.when( + (obs_sub_df1["isAPrivateProgram"].isNotNull() == True) & + (obs_sub_df1["isAPrivateProgram"] == True), + "true" + ).when( + (obs_sub_df1["isAPrivateProgram"].isNotNull() == True) & + (obs_sub_df1["isAPrivateProgram"] == False), + "false" + ).otherwise("false") +) + +obs_sub_df1 = obs_sub_df1.withColumn( + "completedDate", + F.concat(F.col("date"), F.lit("T"), F.col("time"), F.lit(".000Z")) +) + +obs_sub_df = obs_sub_df1.select( + "status", + obs_sub_df1["entityExternalId"].alias("entity_externalId"), + obs_sub_df1["entityId"].alias("entity_id"), + obs_sub_df1["entityType"].alias("entity_type"), + obs_sub_df1["createdBy"].alias("user_id"), + obs_sub_df1["solutionId"].alias("solution_id"), + obs_sub_df1["solutionExternalId"].alias("solution_externalId"), + obs_sub_df1["_id"].alias("submission_id"), + obs_sub_df1["entityInformation"]["name"].alias("entity_name"), + "completedDate", + obs_sub_df1["programId"].alias("program_id"), + obs_sub_df1["programExternalId"].alias("program_externalId"), + obs_sub_df1["app_name"], + obs_sub_df1["private_program"] +) obs_sub_cursorMongo.close() #observation solution dataframe -obs_sol_cursorMongo = solutionCollec.aggregate([{"$match":{"type":"observation"}}, - {"$project": {"_id": {"$toString": "$_id"},"name":1}}]) +obs_sol_cursorMongo = solutionCollec.aggregate( + [ + {"$match": {"type":"observation"}}, + {"$project": {"_id": {"$toString": "$_id"}, "name":1}} + ] +) #schema for the observation solution dataframe obs_sol_schema = StructType([ - StructField('name', StringType(), True), - StructField('_id', StringType(), True) + StructField('name', StringType(), True), + StructField('_id', StringType(), True) ]) -obs_soln_rdd = spark.sparkContext.parallelize(list(obs_sol_cursorMongo)); -obs_soln_df = spark.createDataFrame(obs_soln_rdd,obs_sol_schema); +obs_soln_rdd = spark.sparkContext.parallelize(list(obs_sol_cursorMongo)) +obs_soln_df = spark.createDataFrame(obs_soln_rdd,obs_sol_schema) obs_sol_cursorMongo.close() #match solution id from solution df to submission df to fetch the solution name -obs_sub_soln_df = obs_sub_df.join(obs_soln_df,obs_sub_df.solution_id==obs_soln_df._id,'inner').drop(obs_soln_df["_id"]) -obs_sub_soln_df = obs_sub_soln_df.withColumnRenamed("name","solution_name") +obs_sub_soln_df = obs_sub_df.join( + obs_soln_df, + obs_sub_df.solution_id==obs_soln_df._id, + 'inner' +).drop(obs_soln_df["_id"]) +obs_sub_soln_df = obs_sub_soln_df.withColumnRenamed("name", "solution_name") #observation program dataframe -obs_pgm_cursorMongo = programCollec.aggregate([{"$project": {"_id": {"$toString": "$_id"},"name":1}}]) +obs_pgm_cursorMongo = programCollec.aggregate( + [{"$project": {"_id": {"$toString": "$_id"}, "name": 1}}] +) #schema for the observation program dataframe obs_pgm_schema = StructType([ - StructField('name', StringType(), True), - StructField('_id', StringType(), True) + StructField('name', StringType(), True), + StructField('_id', StringType(), True) ]) -obs_pgm_rdd = spark.sparkContext.parallelize(list(obs_pgm_cursorMongo)); -obs_pgm_df = spark.createDataFrame(obs_pgm_rdd,obs_pgm_schema); +obs_pgm_rdd = spark.sparkContext.parallelize(list(obs_pgm_cursorMongo)) +obs_pgm_df = spark.createDataFrame(obs_pgm_rdd,obs_pgm_schema) obs_pgm_cursorMongo.close() #match solution id from solution df to submission df to fetch the solution name -obs_sub_pgm_df = obs_sub_soln_df.join(obs_pgm_df,obs_sub_soln_df.program_id==obs_pgm_df._id,'inner')\ - .drop(obs_pgm_df["_id"]) -obs_sub_pgm_df = obs_sub_pgm_df.withColumnRenamed("name","program_name") +obs_sub_pgm_df = obs_sub_soln_df.join( + obs_pgm_df, + obs_sub_soln_df.program_id==obs_pgm_df._id, + 'inner' +).drop(obs_pgm_df["_id"]) +obs_sub_pgm_df = obs_sub_pgm_df.withColumnRenamed("name", "program_name") #user organisation dataframe obs_sub_soln_userid_df = obs_sub_pgm_df.select("user_id") @@ -250,73 +283,102 @@ def convert_to_row(d: dict) -> Row: userId_arr = [] uniqueuserId_arr = [] userId_obs_status_df_before = obs_sub_soln_userid_df.toJSON().map(lambda j: json.loads(j)).collect() -for uid in userId_obs_status_df_before : - userId_arr.append(uid["user_id"]) +for uid in userId_obs_status_df_before: + userId_arr.append(uid["user_id"]) uniqueuserId_arr = list(removeduplicate(userId_arr)) userIntegratedAppEntitiesArr = [] for ch in uniqueuserId_arr : - searchUserObj = {} - searchUserObj = searchUser(get_keycloak_obj["access_token"],ch) - if searchUserObj: - searchResult = False - searchResult = "result" in searchUserObj - if searchResult == True : - searchResponse = False - searchResponse = "response" in searchUserObj["result"] - if searchResponse == True : - userRelatedEntitiesObj = {} - userRoles = None - try : - userRoles = searchUserObj["result"]["response"]["userSubType"] - except KeyError : - userRoles = '' - try : - for usrLoc in searchUserObj["result"]["response"]["userLocations"]: - userRelatedEntitiesObj[usrLoc["type"]+'_name'] = usrLoc["name"] - userRelatedEntitiesObj[usrLoc["type"]+'_id'] = usrLoc["id"] - userRelatedEntitiesObj[usrLoc["type"]+'_externalId'] = usrLoc["code"] - userRelatedEntitiesObj["user_id"] = searchUserObj["result"]["response"]["id"] - if userRoles : - userRelatedEntitiesObj["role_id"] = "integrated_app" - userRelatedEntitiesObj["role_externalId"] = "integrated_app" - userRelatedEntitiesObj["role_title"] = userRoles - except KeyError : - pass - if userRelatedEntitiesObj : - userIntegratedAppEntitiesArr.append(userRelatedEntitiesObj) - - for usOg in searchUserObj["result"]["response"]["organisations"]: - searchObj = {} - searchObj["id"] = searchUserObj["result"]["response"]["id"] - searchObj["user_name"] = searchUserObj["result"]["response"]["userName"] - searchObj["first_name"] = searchUserObj["result"]["response"]["firstName"] - searchObj["channel"] = searchUserObj["result"]["response"]["rootOrgId"] - searchObj["parent_channel"] = "SHIKSHALOKAM" - try: - searchObj["organisation_id"] = usOg["organisationId"] - except KeyError : - searchObj["organisation_id"] = None - userId_obs_status_df_after.append(searchObj) - -df_user_org = ks.DataFrame(userId_obs_status_df_after); + userObj = {} + userObj = datastore.hgetall("user:"+ch) + if userObj : + stateName = None + blockName = None + districtName = None + clusterName = None + rootOrgId = None + userSubType = None + userSchool = None + userSchoolUDISE = None + userSchoolName = None + try: + userSchool = userObj["school"] + except KeyError : + userSchool = '' + try: + userSchoolUDISE = userObj["schooludisecode"] + except KeyError : + userSchoolUDISE = '' + try: + userSchoolName = userObj["schoolname"] + except KeyError : + userSchoolName = '' + try: + userSubType = userObj["usersubtype"] + except KeyError : + userSubType = '' + try: + stateName = userObj["state"] + except KeyError : + stateName = '' + try: + blockName = userObj["block"] + except KeyError : + blockName = '' + try: + districtName = userObj["district"] + except KeyError : + districtName = '' + try: + clusterName = userObj["cluster"] + except KeyError : + clusterName = '' + try: + rootOrgId = userObj["rootorgid"] + except KeyError : + rootOrgId = '' + userRelatedEntitiesObj = {} + try : + userRelatedEntitiesObj["state_name"] = stateName + userRelatedEntitiesObj["block_name"] = blockName + userRelatedEntitiesObj["district_name"] = districtName + userRelatedEntitiesObj["cluster_name"] = clusterName + userRelatedEntitiesObj["user_id"] = ch + userRelatedEntitiesObj["role_title"] = userSubType + userRelatedEntitiesObj["school_id"] = userSchool + userRelatedEntitiesObj["school_name"] = userSchoolName + userRelatedEntitiesObj["school_externalId"] = userSchoolUDISE + except KeyError : + pass + if userRelatedEntitiesObj : + userIntegratedAppEntitiesArr.append(userRelatedEntitiesObj) + + searchObj = {} + searchObj["id"] = ch + searchObj["channel"] = rootOrgId + searchObj["parent_channel"] = "SHIKSHALOKAM" + userId_obs_status_df_after.append(searchObj) + +df_user_org = ks.DataFrame(userId_obs_status_df_after) df_user_org = df_user_org.to_spark() if len(userIntegratedAppEntitiesArr) > 0 : - df_user_rel_entities = ks.DataFrame(userIntegratedAppEntitiesArr) - df_user_rel_entities = df_user_rel_entities.to_spark() + df_user_rel_entities = ks.DataFrame(userIntegratedAppEntitiesArr) + df_user_rel_entities = df_user_rel_entities.to_spark() # roles dataframe from mongodb -roles_cursorMongo = userRolesCollec.aggregate([{"$project": {"_id": {"$toString": "$_id"},"title":1}}]) +roles_cursorMongo = userRolesCollec.aggregate( + [{"$project": {"_id": {"$toString": "$_id"}, "title": 1}}] +) #schema for the observation solution dataframe roles_schema = StructType([ - StructField('title', StringType(), True), - StructField('_id', StringType(), True) + StructField('title', StringType(), True), + StructField('_id', StringType(), True) ]) -roles_rdd = spark.sparkContext.parallelize(list(roles_cursorMongo)); -roles_df = spark.createDataFrame(roles_rdd,roles_schema); +roles_rdd = spark.sparkContext.parallelize(list(roles_cursorMongo)) +roles_df = spark.createDataFrame(roles_rdd, roles_schema) roles_cursorMongo.close() @@ -324,161 +386,195 @@ def convert_to_row(d: dict) -> Row: userEntityRoleArray = [] try: - def elasticSearchJson(userEntityJson) : - for user in userEntityJson : - try: - if len(user["_source"]["data"]["roles"]) > 0 : - for roleObj in user["_source"]["data"]["roles"]: - try: - if len(roleObj["entities"]) > 0: - for ent in roleObj["entities"]: - entObj = {} - entObj["userId"] = user["_source"]["data"]["userId"] - entObj["roleId"] = roleObj["roleId"] - entObj["roleCode"] =roleObj["code"] - entObj["entityId"] = ent - userEntityRoleArray.append(entObj) - else : - entNoObj = {} - entNoObj["userId"] = user["_source"]["data"]["userId"] - entNoObj["roleId"] = roleObj["roleId"] - entNoObj["roleCode"] = roleObj["code"] - entNoObj["entityId"] = None - userEntityRoleArray.append(entNoObj) - except KeyError : - entNoEntObj = {} - entNoEntObj["userId"] = user["_source"]["data"]["userId"] - entNoEntObj["roleId"] = roleObj["roleId"] - entNoEntObj["roleCode"] = roleObj["code"] - entNoEntObj["entityId"] = None - userEntityRoleArray.append(entNoEntObj) - pass - except KeyError : - pass + def elasticSearchJson(userEntityJson) : + for user in userEntityJson : + try: + if len(user["_source"]["data"]["roles"]) > 0 : + for roleObj in user["_source"]["data"]["roles"]: + try: + if len(roleObj["entities"]) > 0: + for ent in roleObj["entities"]: + entObj = {} + entObj["userId"] = user["_source"]["data"]["userId"] + entObj["roleId"] = roleObj["roleId"] + entObj["roleCode"] =roleObj["code"] + entObj["entityId"] = ent + userEntityRoleArray.append(entObj) + else : + entNoObj = {} + entNoObj["userId"] = user["_source"]["data"]["userId"] + entNoObj["roleId"] = roleObj["roleId"] + entNoObj["roleCode"] = roleObj["code"] + entNoObj["entityId"] = None + userEntityRoleArray.append(entNoObj) + except KeyError : + entNoEntObj = {} + entNoEntObj["userId"] = user["_source"]["data"]["userId"] + entNoEntObj["roleId"] = roleObj["roleId"] + entNoEntObj["roleCode"] = roleObj["code"] + entNoEntObj["entityId"] = None + userEntityRoleArray.append(entNoEntObj) + pass + except KeyError : + pass except Exception as e: - errorLogger.error(e,exc_info=True) + errorLogger.error(e, exc_info=True) headers_user = {'Content-Type': 'application/json'} -url_getuserinfo = config.get("ELASTICSEARCH","url_user") -payload_user_elastic = {"size": 10000,"query":{"bool":{"must":[{"match":{"_type":"_doc"}}]}}} -user_response = requests.post(url_getuserinfo , headers = headers_user,data=json.dumps(payload_user_elastic)) +url_getuserinfo = config.get("ELASTICSEARCH", "url_user") +payload_user_elastic = { + "size": 10000, + "query": { + "bool": { + "must":[{"match":{"_type":"_doc"}}] + } + } +} +user_response = requests.post( + url_getuserinfo , headers=headers_user, data=json.dumps(payload_user_elastic) +) try: - if user_response.status_code == 200: - user_response = user_response.json() - user_data = user_response['hits']['hits'] - elasticSearchJson(user_data) - user_scroll_id = user_response['_scroll_id'] - else: - errorLogger.error(user_response) - errorLogger.error(user_response.text) - errorLogger.error("Failure in getting User Data From Elastic Search") + if user_response.status_code == 200: + user_response = user_response.json() + user_data = user_response['hits']['hits'] + elasticSearchJson(user_data) + user_scroll_id = user_response['_scroll_id'] + else: + errorLogger.error(user_response) + errorLogger.error(user_response.text) + errorLogger.error("Failure in getting User Data From Elastic Search") except KeyError as e: - user_hit = [] - user_scroll_id = None - errorLogger.error("user scroll id error") + user_hit = [] + user_scroll_id = None + errorLogger.error("user scroll id error") while user_data: - user_scroll_payload = json.dumps({ - 'scroll': '1m', - 'scroll_id': user_scroll_id - }) - user_scroll_api_url = config.get("ELASTICSEARCH","url_user_scroll") - user_scroll_response = requests.post(user_scroll_api_url,headers=headers_user,data = user_scroll_payload) - try: - if user_scroll_response.status_code == 200: - user_scroll_response = user_scroll_response.json() - user_data = user_scroll_response['hits']['hits'] - if len(user_data) > 0 : - elasticSearchJson(user_data) - user_scroll_id = user_scroll_response['_scroll_id'] - - else: - errorLogger.error("Failure in getting User Data From Elastic Search") - except KeyError : - user_entity_data = [] - user_entity_scroll_id = None + user_scroll_payload = json.dumps({ + 'scroll': '1m', + 'scroll_id': user_scroll_id + }) + user_scroll_api_url = config.get("ELASTICSEARCH", "url_user_scroll") + user_scroll_response = requests.post( + user_scroll_api_url, headers=headers_user, data = user_scroll_payload + ) + try: + if user_scroll_response.status_code == 200: + user_scroll_response = user_scroll_response.json() + user_data = user_scroll_response['hits']['hits'] + if len(user_data) > 0 : + elasticSearchJson(user_data) + user_scroll_id = user_scroll_response['_scroll_id'] + else: + errorLogger.error("Failure in getting User Data From Elastic Search") + except KeyError : + user_entity_data = [] + user_entity_scroll_id = None #schema for the observation solution dataframe user_roles_schema = StructType([ - StructField('roleId', StringType(), True), - StructField('userId', StringType(), True), - StructField('roleCode', StringType(), True), - StructField('entityId', StringType(), True) + StructField('roleId', StringType(), True), + StructField('userId', StringType(), True), + StructField('roleCode', StringType(), True), + StructField('entityId', StringType(), True) ]) -user_roles_rdd = spark.sparkContext.parallelize(list(userEntityRoleArray)); -user_roles_df = spark.createDataFrame(user_roles_rdd,user_roles_schema); +user_roles_rdd = spark.sparkContext.parallelize(list(userEntityRoleArray)) +user_roles_df = spark.createDataFrame(user_roles_rdd, user_roles_schema) # merge user_roles_df and roles_df to get role title -user_roles_title_df = user_roles_df.join(roles_df,user_roles_df.roleId==roles_df._id,'inner').drop(roles_df["_id"]) -user_roles_title_df = user_roles_title_df.select(user_roles_title_df["roleId"].alias("role_id"), - user_roles_title_df["userId"].alias("user_id"), - user_roles_title_df["roleCode"].alias("role_externalId"), - user_roles_title_df["entityId"], - user_roles_title_df["title"].alias("role_title")) +user_roles_title_df = user_roles_df.join( + roles_df, + user_roles_df.roleId==roles_df._id, + 'inner' +).drop(roles_df["_id"]) +user_roles_title_df = user_roles_title_df.select( + user_roles_title_df["userId"].alias("user_id"), + user_roles_title_df["entityId"], + user_roles_title_df["title"].alias("role_title") +) #entity elastic search dataframe entityArray = [] def entityElasticSearchJson(entityJsonData): - for ent_data in entityJsonData : - for tel in ent_data["_source"]["data"]["telemetry_entities"]: - tel["entity_id"] = ent_data["_source"]["data"]["_id"] - entityArray.append(tel) + for ent_data in entityJsonData : + for tel in ent_data["_source"]["data"]["telemetry_entities"]: + tel["entity_id"] = ent_data["_source"]["data"]["_id"] + entityArray.append(tel) + + headers_entity = {'Content-Type': 'application/json'} -url_getentityinfo = config.get("ELASTICSEARCH","url_entity") -payload_entity_elastic = {"size": 10000,"query":{"bool":{"must":[{"match":{"_type":"_doc"}}]}}} -entity_response = requests.post(url_getentityinfo , headers = headers_entity,data=json.dumps(payload_entity_elastic)) +url_getentityinfo = config.get("ELASTICSEARCH", "url_entity") +payload_entity_elastic = { + "size": 10000, + "query":{ + "bool": { + "must": [{"match": {"_type": "_doc"}}] + } + } +} +entity_response = requests.post( + url_getentityinfo , headers = headers_entity, data=json.dumps(payload_entity_elastic) +) try: - if entity_response.status_code == 200: - entity_response = entity_response.json() - entity_data = entity_response['hits']['hits'] - entityElasticSearchJson(entity_data) - entity_scroll_id = entity_response['_scroll_id'] - else: - errorLogger.error("Failure in getting Entity Data From Elastic Search") + if entity_response.status_code == 200: + entity_response = entity_response.json() + entity_data = entity_response['hits']['hits'] + entityElasticSearchJson(entity_data) + entity_scroll_id = entity_response['_scroll_id'] + else: + errorLogger.error("Failure in getting Entity Data From Elastic Search") except KeyError as e: - entity_hit = [] - entity_scroll_id = None - errorLogger.error("entity scroll id error") + entity_hit = [] + entity_scroll_id = None + errorLogger.error("entity scroll id error") while entity_data: - entity_scroll_payload = json.dumps({ - 'scroll': '1m', - 'scroll_id': entity_scroll_id - }) - entity_scroll_api_url = config.get("ELASTICSEARCH","url_user_scroll") - entity_scroll_response = requests.post(entity_scroll_api_url,headers=headers_entity,data = entity_scroll_payload) - try: - if entity_scroll_response.status_code == 200: - entity_scroll_response = entity_scroll_response.json() - entity_data = entity_scroll_response['hits']['hits'] - if len(entity_data) > 0 : - entityElasticSearchJson(entity_data) - entity_scroll_id = entity_scroll_response['_scroll_id'] - - else: - errorLogger.error("Failure in getting Entity Data From Elastic Search") - except KeyError : - entity_entity_data = [] - entity_entity_scroll_id = None - - -entity_df = ks.DataFrame(entityArray); + entity_scroll_payload = json.dumps({ + 'scroll': '1m', + 'scroll_id': entity_scroll_id + }) + entity_scroll_api_url = config.get("ELASTICSEARCH", "url_user_scroll") + entity_scroll_response = requests.post( + entity_scroll_api_url, headers=headers_entity, data=entity_scroll_payload + ) + try: + if entity_scroll_response.status_code == 200: + entity_scroll_response = entity_scroll_response.json() + entity_data = entity_scroll_response['hits']['hits'] + if len(entity_data) > 0 : + entityElasticSearchJson(entity_data) + entity_scroll_id = entity_scroll_response['_scroll_id'] + else: + errorLogger.error("Failure in getting Entity Data From Elastic Search") + except KeyError : + entity_entity_data = [] + entity_entity_scroll_id = None + +entity_df = ks.DataFrame(entityArray) entity_df = entity_df.to_spark() # merge user role title dataframe and entity dataframe -user_entity_info_df = user_roles_title_df.join(entity_df,user_roles_title_df.entityId==entity_df.entity_id,'inner')\ - .drop(user_roles_title_df["entityId"]) +user_entity_info_df = user_roles_title_df.join( + entity_df, + user_roles_title_df.entityId==entity_df.entity_id, + 'inner' +).drop(user_roles_title_df["entityId"]) # merge user entity dataframe and user org dataframe -user_df = df_user_org.join(user_entity_info_df,df_user_org.id==user_entity_info_df.user_id,'left')\ - .drop(user_entity_info_df["user_id"]).drop(user_entity_info_df["entity_id"]) - -user_df_integrated_app = df_user_org.join(df_user_rel_entities,df_user_org.id==df_user_rel_entities.user_id,'left') +user_df = df_user_org.join( + user_entity_info_df, + df_user_org.id==user_entity_info_df.user_id, + 'left' +).drop(user_entity_info_df["user_id"]).drop(user_entity_info_df["entity_id"]) + +user_df_integrated_app = df_user_org.join( + df_user_rel_entities, + df_user_org.id==df_user_rel_entities.user_id, + 'left' +) user_df_integrated_app = user_df_integrated_app.drop(user_df_integrated_app["user_id"]) - obs_sub_cursorMongo = [] obs_sol_cursorMongo = [] user_org_rows = [] @@ -497,15 +593,23 @@ def entityElasticSearchJson(entityJsonData): user_entity_info_df.cache() # merge user dataframe and observation submission dataframe -obs_sub_status_df_survey = obs_sub_pgm_df\ - .join(user_df,[obs_sub_pgm_df.user_id==user_df.id, - obs_sub_pgm_df.app_name==config.get("COMMON","diksha_survey_app_name")],'inner')\ - .drop(user_df["id"]).drop(user_df["entity_id"]) - -obs_sub_status_df_integrated_app = obs_sub_pgm_df\ - .join(user_df_integrated_app,[obs_sub_pgm_df.user_id==user_df_integrated_app.id, - obs_sub_pgm_df.app_name==config.get("COMMON","diksha_integrated_app_name")],'inner')\ - .drop(user_df_integrated_app["id"]) +obs_sub_status_df_survey = obs_sub_pgm_df.join( + user_df, + [ + obs_sub_pgm_df.user_id==user_df.id, + obs_sub_pgm_df.app_name==config.get("COMMON", "diksha_survey_app_name") + ], + 'inner' +).drop(user_df["id"]).drop(user_df["entity_id"]) + +obs_sub_status_df_integrated_app = obs_sub_pgm_df.join( + user_df_integrated_app, + [ + obs_sub_pgm_df.user_id==user_df_integrated_app.id, + obs_sub_pgm_df.app_name==config.get("COMMON", "diksha_integrated_app_name") + ], + 'inner' +).drop(user_df_integrated_app["id"]) integrated_app_column_list = [] survey_app_column_list = [] @@ -513,114 +617,135 @@ def entityElasticSearchJson(entityJsonData): survey_app_column_list = obs_sub_status_df_survey.columns missing_col_in_integrated_app_list = [] -missing_col_in_integrated_app_list = list(set(integrated_app_column_list) - set(survey_app_column_list)) +missing_col_in_integrated_app_list = list( + set(integrated_app_column_list) - set(survey_app_column_list) +) missing_col_in_survey_app_list = [] -missing_col_in_survey_app_list = list(set(survey_app_column_list) - set(integrated_app_column_list)) +missing_col_in_survey_app_list = list( + set(survey_app_column_list) - set(integrated_app_column_list) +) if len(missing_col_in_survey_app_list) : - for inte in missing_col_in_survey_app_list : - obs_sub_status_df_integrated_app = obs_sub_status_df_integrated_app.withColumn(inte, lit(None).cast(StringType())) + for inte in missing_col_in_survey_app_list : + obs_sub_status_df_integrated_app = obs_sub_status_df_integrated_app.withColumn( + inte, lit(None).cast(StringType()) + ) if len(missing_col_in_integrated_app_list) : - for sur in missing_col_in_integrated_app_list : - obs_sub_status_df_survey = obs_sub_status_df_survey.withColumn(sur, lit(None).cast(StringType())) + for sur in missing_col_in_integrated_app_list : + obs_sub_status_df_survey = obs_sub_status_df_survey.withColumn( + sur, lit(None).cast(StringType()) + ) final_df = obs_sub_status_df_integrated_app.unionByName(obs_sub_status_df_survey) final_df = final_df.dropDuplicates() -final_df.coalesce(1).write.format("json").mode("overwrite") \ - .save(config.get("COMMON","observation_status_output_dir")+"/") - -for filename in os.listdir(config.get("COMMON","observation_status_output_dir")+"/"): - if filename.endswith(".json"): - os.rename(config.get("COMMON","observation_status_output_dir")+"/"+filename, - config.get("COMMON","observation_status_output_dir")+"/sl_observation_status.json") -blob_service_client = BlockBlobService(account_name=config.get("AZURE","account_name"), - sas_token=config.get("AZURE","sas_token")) -container_name = config.get("AZURE","container_name") -local_path = config.get("COMMON","observation_status_output_dir") -blob_path = config.get("AZURE","blob_path") + +final_df.coalesce(1).write.format("json").mode("overwrite").save( + config.get("OUTPUT_DIR", "observation_status_output_dir")+"/" +) + +for filename in os.listdir(config.get("OUTPUT_DIR", "observation_status_output_dir")+"/"): + if filename.endswith(".json"): + os.rename( + config.get("OUTPUT_DIR", "observation_status_output_dir") + "/" + filename, + config.get("OUTPUT_DIR", "observation_status_output_dir") + "/sl_observation_status.json" + ) + +blob_service_client = BlockBlobService( + account_name=config.get("AZURE", "account_name"), + sas_token=config.get("AZURE", "sas_token") +) +container_name = config.get("AZURE", "container_name") +local_path = config.get("OUTPUT_DIR", "observation_status_output_dir") +blob_path = config.get("AZURE", "blob_path") for files in os.listdir(local_path): - if "sl_observation_status.json" in files: - blob_service_client.create_blob_from_path(container_name,os.path.join(blob_path,files),local_path + "/" + files) + if "sl_observation_status.json" in files: + blob_service_client.create_blob_from_path( + container_name, + os.path.join(blob_path,files), + local_path + "/" + files + ) -datasources = ["sl-observation-status"] +datasources = ["sl_observation_status"] -sl_status_spec = config.get("DRUID","observation_status_spec") +sl_status_spec = config.get("DRUID", "observation_status_spec") ingestion_specs = [sl_status_spec] for i,j in zip(datasources,ingestion_specs): - - druid_end_point = config.get("DRUID","druid_end_point")+i - - druid_batch_end_point = config.get("DRUID","druid_batch_end_point") - - headers = {'Content-Type' : 'application/json'} - - get_timestamp = requests.get(druid_end_point, headers=headers) - - successLogger.debug(get_timestamp) - if get_timestamp.status_code == 200 : - successLogger.debug("Successfully fetched time stamp of the datasource " + i ) - timestamp = get_timestamp.json() - #calculating interval from druid get api - minTime = timestamp["segments"]["minTime"] - maxTime = timestamp["segments"]["maxTime"] - min1 = datetime.datetime.strptime(minTime,"%Y-%m-%dT%H:%M:%S.%fZ") - max1 = datetime.datetime.strptime(maxTime,"%Y-%m-%dT%H:%M:%S.%fZ") - new_format = "%Y-%m-%d" - min1.strftime(new_format) - max1.strftime(new_format) - minmonth = "{:02d}".format(min1.month) - maxmonth = "{:02d}".format(max1.month) - min2 = str(min1.year) + "-" + minmonth + "-" + str(min1.day) - max2 = str(max1.year) + "-" + maxmonth + "-" + str(max1.day) - interval = min2 + "_" + max2 - successLogger.debug(interval) - - time.sleep(50) - - disable_datasource = requests.delete(druid_end_point, headers=headers) - if disable_datasource.status_code == 200: - successLogger.debug("successfully disabled the datasource " + i) - time.sleep(300) - - delete_segments = requests.delete(druid_end_point + "/intervals/" + interval, headers=headers) - if delete_segments.status_code == 200: - successLogger.debug("successfully deleted the segments " + i) - time.sleep(300) - - enable_datasource = requests.get(druid_end_point, headers=headers) - if enable_datasource.status_code == 204: - successLogger.debug("successfully enabled the datasource " + i) - - time.sleep(300) - - start_supervisor = requests.post(druid_batch_end_point,data=j, headers=headers) - successLogger.debug("ingest data") - if start_supervisor.status_code == 200: - successLogger.debug("started the batch ingestion task sucessfully for the datasource " + i) - time.sleep(50) - else: - errorLogger.error("failed to start batch ingestion task" + str(start_supervisor.status_code)) - - else: - errorLogger.error("failed to enable the datasource " + i) - else: - errorLogger.error("failed to delete the segments of the datasource " + i) - else: - errorLogger.error("failed to disable the datasource " + i) - - - - elif get_timestamp.status_code == 204: - start_supervisor = requests.post(druid_batch_end_point,data=j, headers=headers) - if start_supervisor.status_code == 200: - successLogger.debug("started the batch ingestion task sucessfully for the datasource " + i) - time.sleep(50) - else: - errorLogger.error("failed to start batch ingestion task" + str(start_supervisor.status_code)) - errorLogger.error(start_supervisor.json()) - else: - errorLogger.error("failed to get the timestamp of the datasource " + i) + druid_end_point = config.get("DRUID", "druid_end_point") + i + druid_batch_end_point = config.get("DRUID", "druid_batch_end_point") + headers = {'Content-Type': 'application/json'} + get_timestamp = requests.get(druid_end_point, headers=headers) + successLogger.debug(get_timestamp) + if get_timestamp.status_code == 200 : + successLogger.debug("Successfully fetched time stamp of the datasource " + i) + timestamp = get_timestamp.json() + #calculating interval from druid get api + minTime = timestamp["segments"]["minTime"] + maxTime = timestamp["segments"]["maxTime"] + min1 = datetime.datetime.strptime(minTime,"%Y-%m-%dT%H:%M:%S.%fZ") + max1 = datetime.datetime.strptime(maxTime,"%Y-%m-%dT%H:%M:%S.%fZ") + new_format = "%Y-%m-%d" + min1.strftime(new_format) + max1.strftime(new_format) + minmonth = "{:02d}".format(min1.month) + maxmonth = "{:02d}".format(max1.month) + min2 = str(min1.year) + "-" + minmonth + "-" + str(min1.day) + max2 = str(max1.year) + "-" + maxmonth + "-" + str(max1.day) + interval = min2 + "_" + max2 + successLogger.debug(interval) + + time.sleep(50) + + disable_datasource = requests.delete(druid_end_point, headers=headers) + if disable_datasource.status_code == 200: + successLogger.debug("successfully disabled the datasource " + i) + time.sleep(300) + + delete_segments = requests.delete( + ruid_end_point + "/intervals/" + interval, headers=headers + ) + if delete_segments.status_code == 200: + successLogger.debug("successfully deleted the segments " + i) + time.sleep(300) + + enable_datasource = requests.get(druid_end_point, headers=headers) + if enable_datasource.status_code == 204: + successLogger.debug("successfully enabled the datasource " + i) + + time.sleep(300) + + start_supervisor = requests.post(druid_batch_end_point, data=j, headers=headers) + successLogger.debug("ingest data") + if start_supervisor.status_code == 200: + successLogger.debug( + "started the batch ingestion task sucessfully for the datasource " + i + ) + time.sleep(50) + else: + errorLogger.error( + "failed to start batch ingestion task" + str(start_supervisor.status_code) + ) + else: + errorLogger.error("failed to enable the datasource " + i) + else: + errorLogger.error("failed to delete the segments of the datasource " + i) + else: + errorLogger.error("failed to disable the datasource " + i) + + elif get_timestamp.status_code == 204: + start_supervisor = requests.post(druid_batch_end_point, data=j, headers=headers) + if start_supervisor.status_code == 200: + successLogger.debug( + "started the batch ingestion task sucessfully for the datasource " + i + ) + time.sleep(50) + else: + errorLogger.error( + "failed to start batch ingestion task" + str(start_supervisor.status_code) + ) + errorLogger.error(start_supervisor.json()) + else: + errorLogger.error("failed to get the timestamp of the datasource " + i) diff --git a/projects/config.sample b/projects/config.sample deleted file mode 100644 index 0556a0b..0000000 --- a/projects/config.sample +++ /dev/null @@ -1,64 +0,0 @@ -# -------------------- -[MONGO] -# -------------------- - -url = mongodb://: -db = -collection = - -# -------------------- -[KEYCLOAK] -# -------------------- - -#url = https:///auth/realms/sunbird/protocol/openid-connect/token -url = http:///auth/realms/sunbird/protocol/openid-connect/token -#client_id = -#client_secret = -client_id = -grant_type = -refresh_token = - -# -------------------- -[API] -# -------------------- - -content_type = application/json -authorization = Bearer - -# -------------------- -[ENDPOINTS] -# -------------------- - -read_user = http:///api/user/v3/read -coordinator_v1_ds = coordinator/v1/datasources/ -indexer_v1_task = indexer/v1/task - -# -------------------- -[DRUID] -# -------------------- - -url = http:///druid/ - -# -------------------- -[SPECS] -# -------------------- - -sl_general_unnati_spec = {"type":"index","spec":{"ioConfig":{"type":"index","inputSource":{"type": "azure","uris": ["azure://samiksha/projects/sl_projects.json"]},"inputFormat":{"type":"json"}},"tuningConfig":{"type":"index","partitionsSpec":{"type":"dynamic"}},"dataSchema":{"dataSource":"sl-project","granularitySpec":{"type":"uniform","queryGranularity":"HOUR","rollup":true,"segmentGranularity":"DAY"},"timestampSpec":{"column":"project_updated_date","format":"auto"},"dimensionsSpec":{"dimensions":[]},"metricsSpec":[]}}} - -# -------------------- -[AZURE] -# -------------------- - -account_name = -sas_token = -container_name = -blob_path = - -# -------------------- -[FILE_PATHS] -# -------------------- - -projects_output_dir = -project_success_log_filename = -project_error_log_filename = - diff --git a/projects/pyspark_project_batch.py b/projects/pyspark_project_batch.py index d7b1cf6..d1b6b79 100644 --- a/projects/pyspark_project_batch.py +++ b/projects/pyspark_project_batch.py @@ -4,9 +4,11 @@ # Description : # # ----------------------------------------------------------------- + import json, sys, time -from configparser import ConfigParser, ExtendedInterpolation +from configparser import ConfigParser,ExtendedInterpolation from pymongo import MongoClient +from bson.objectid import ObjectId import os import requests from pyspark.sql import SparkSession @@ -14,16 +16,20 @@ import pyspark.sql.functions as F from pyspark.sql.types import * from pyspark.sql import Row -from collections import OrderedDict +from collections import OrderedDict, Counter import databricks.koalas as ks from azure.storage.blob import BlockBlobService, PublicAccess +from azure.storage.blob import ContentSettings +import logging import logging.handlers from logging.handlers import TimedRotatingFileHandler import datetime +from datetime import date +import redis -config_path = os.path.dirname(os.path.abspath(__file__)) +config_path = os.path.split(os.path.dirname(os.path.abspath(__file__))) config = ConfigParser(interpolation=ExtendedInterpolation()) -config.read(config_path + "/config.ini") +config.read(config_path[0] + "/config.ini") formatter = logging.Formatter('%(asctime)s - %(levelname)s') @@ -32,10 +38,10 @@ # Add the log message handler to the logger successHandler = logging.handlers.RotatingFileHandler( - config.get('FILE_PATHS', 'project_success_log_filename') + config.get('LOGS', 'project_success_log_filename') ) successBackuphandler = TimedRotatingFileHandler( - config.get('FILE_PATHS', 'project_success_log_filename'), + config.get('LOGS','project_success_log_filename'), when="w0", backupCount=1 ) @@ -46,10 +52,10 @@ errorLogger = logging.getLogger('error log') errorLogger.setLevel(logging.ERROR) errorHandler = logging.handlers.RotatingFileHandler( - config.get('FILE_PATHS', 'project_error_log_filename') + config.get('LOGS', 'project_error_log_filename') ) errorBackuphandler = TimedRotatingFileHandler( - config.get('FILE_PATHS', 'project_error_log_filename'), + config.get('LOGS', 'project_error_log_filename'), when="w0", backupCount=1 ) @@ -65,56 +71,18 @@ def convert_to_row(d: dict) -> Row: spark = SparkSession.builder.appName("projects").config("spark.driver.memory", "25g").getOrCreate() -clientProd = MongoClient(config.get('MONGO', 'url')) - -dbProd = clientProd[config.get('MONGO', 'db')] - -projectsCollec = dbProd[config.get('MONGO', 'collection')] - -# getKeyclock api to generate authentication token -try: - def get_keyclock_accesstoken(): - url_getkeyclock = config.get("KEYCLOAK", "url") - headers_getkeyclock = {'Content-Type': 'application/x-www-form-urlencoded'} - body_getkeyclock = { - "grant_type": config.get("KEYCLOAK", "grant_type"), - "client_id": config.get("KEYCLOAK", "client_id"), - "refresh_token": config.get("KEYCLOAK", "refresh_token") - } - - responsegetkeyclock = requests.post( - url_getkeyclock, data=body_getkeyclock, headers=headers_getkeyclock - ) - if responsegetkeyclock.status_code == 200: - successLogger.debug("getkeyclock api") - return responsegetkeyclock.json() - else: - errorLogger.error(" Failure in getkeyclock API ") - errorLogger.error(responsegetkeyclock) - errorLogger.error(responsegetkeyclock.text) -except Exception as e: - errorLogger.error(e, exc_info=True) +clientProd = MongoClient(config.get('MONGO', 'mongo_url')) +db = clientProd[config.get('MONGO', 'database_name')] +projectsCollec = db[config.get('MONGO', 'projects_collection')] -try: - def readUser(userId, accessToken): - queryStringReadUser = "?fields=completeness%2CmissingFields%2ClastLoginTime%2Ctopics%2Corganisations%2Croles%2Clocations%2Cdeclarations" - urlReadUser = config.get("ENDPOINTS", "read_user") + "/" + str(userId) + queryStringReadUser - headersReadUser = { - 'Content-Type': config.get("API", "content_type"), - 'Authorization': config.get("API", "authorization"), - 'X-authenticated-user-token': accessToken - } - responseReadUser = requests.get(urlReadUser, headers=headersReadUser) - if responseReadUser.status_code == 200: - successLogger.debug("read user api") - responseReadUser = responseReadUser.json() - return responseReadUser - else: - errorLogger.error("read user api failed") - errorLogger.error(responseReadUser) - errorLogger.error(responseReadUser.text) -except Exception as e: - errorLogger.error(e, exc_info=True) +# redis cache connection +redis_connection = redis.ConnectionPool( + host=config.get("REDIS", "host"), + decode_responses=True, + port=config.get("REDIS", "port"), + db=config.get("REDIS", "db_name") +) +datastore = redis.StrictRedis(connection_pool=redis_connection) try: def removeduplicate(it): @@ -128,110 +96,128 @@ def removeduplicate(it): spark = SparkSession.builder.appName("projects").config( "spark.driver.memory", "50g" -).config("spark.executor.memory", "100g").config( +).config( + "spark.executor.memory", "100g" +).config( "spark.memory.offHeap.enabled", True -).config("spark.memory.offHeap.size", "32g").getOrCreate() +).config( + "spark.memory.offHeap.size", "32g" +).getOrCreate() sc = spark.sparkContext projects_cursorMongo = projectsCollec.aggregate( - [ - { - "$project": { - "_id": {"$toString": "$_id"}, - "projectTemplateId": {"$toString": "$projectTemplateId"}, - "solutionInformation": {"name": 1}, "title": 1, - "programId": {"$toString": "$programId"}, - "programInformation": {"name": 1}, - "metaInformation": {"duration": 1}, "syncedAt": 1, - "updatedAt": 1, "isDeleted": 1, "categories": 1, - "tasks": 1, "status": 1, "userId": 1, "description": 1, - "createdAt": 1 - } + [{ + "$project": { + "_id": {"$toString": "$_id"}, + "projectTemplateId": {"$toString": "$projectTemplateId"}, + "solutionInformation": {"name": 1}, + "title": 1, + "programId": {"$toString": "$programId"}, + "programInformation": {"name": 1}, + "metaInformation": {"duration": 1}, + "syncedAt": 1, + "updatedAt": 1, + "isDeleted": 1, + "categories": 1, + "tasks": 1, + "status": 1, + "userId": 1, + "description": 1, + "createdAt": 1, + "programExternalId": 1, + "isAPrivateProgram": 1, + "hasAcceptedTAndC": 1 } - ] + }] ) -projects_schema = StructType( - [ - StructField('_id', StringType(), True), - StructField('projectTemplateId', StringType(), True), - StructField( - 'solutionInformation', StructType( - [StructField('name', StringType(), True)] - ) - ), - StructField('title', StringType(), True), - StructField('programId', StringType(), True), - StructField( - 'programInformation', StructType( - [StructField('name', StringType(), True)] - ) - ), - StructField( - 'metaInformation', StructType( - [StructField('duration', StringType(), True)] - ) - ), - StructField('updatedAt', TimestampType(), True), - StructField('syncedAt', TimestampType(), True), - StructField('isDeleted', BooleanType(), True), - StructField('status', StringType(), True), - StructField('userId', StringType(), True), - StructField('description', StringType(), True), - StructField('createdAt', TimestampType(), True), - StructField( - 'categories', ArrayType( - StructType( - [StructField('name', StringType(), True)] - ) - ), True - ), - StructField( - 'tasks', ArrayType( - StructType( - [ - StructField('_id', StringType(), True), - StructField('name', StringType(), True), - StructField('assignee', StringType(), True), - StructField( - 'attachments', ArrayType( - StructType([StructField('sourcePath', StringType(), True)]) - ) - ), - StructField('startDate', StringType(), True), - StructField('endDate', StringType(), True), - StructField('syncedAt', TimestampType(), True), - StructField('status', StringType(), True), - StructField('children', ArrayType( - StructType([ - StructField('_id', StringType(), True), - StructField('name', StringType(), True), - StructField('startDate', StringType(), True), - StructField('endDate', StringType(), True), - StructField('syncedAt', TimestampType(), True), - StructField('status', StringType(), True) - ]) - )), - ] - ) - ), True - ) - ] -) +projects_schema = StructType([ + StructField('_id', StringType(), True), + StructField('projectTemplateId', StringType(), True), + StructField( + 'solutionInformation', + StructType([StructField('name', StringType(), True)]) + ), + StructField('title', StringType(), True), + StructField('programId', StringType(), True), + StructField('programExternalId', StringType(), True), + StructField( + 'programInformation', + StructType([StructField('name', StringType(), True)]) + ), + StructField( + 'metaInformation', + StructType([StructField('duration', StringType(), True)]) + ), + StructField('updatedAt', TimestampType(), True), + StructField('syncedAt', TimestampType(), True), + StructField('isDeleted', BooleanType(), True), + StructField('status', StringType(), True), + StructField('userId', StringType(), True), + StructField('description', StringType(), True), + StructField('createdAt', TimestampType(), True), + StructField('isAPrivateProgram', BooleanType(), True), + StructField('hasAcceptedTAndC', BooleanType(), True), + StructField( + 'categories', + ArrayType( + StructType([StructField('name', StringType(), True)]) + ), True + ), + StructField( + 'tasks', + ArrayType( + StructType([ + StructField('_id', StringType(), True), + StructField('name', StringType(), True), + StructField('assignee', StringType(), True), + StructField( + 'attachments', + ArrayType( + StructType([ + StructField('sourcePath', StringType(), True) + ]) + ) + ), + StructField('startDate', StringType(), True), + StructField('endDate', StringType(), True), + StructField('syncedAt', TimestampType(), True), + StructField('status', StringType(), True), + StructField('isDeleted', BooleanType(), True), + StructField( + 'children', + ArrayType( + StructType([ + StructField('_id', StringType(), True), + StructField('name', StringType(), True), + StructField('startDate',StringType(), True), + StructField('endDate', StringType(), True), + StructField('syncedAt', TimestampType(), True), + StructField('status', StringType(), True), + StructField('isDeleted', BooleanType(), True), + ]) + ) + ), + ]) + ), True + ) +]) projects_rdd = spark.sparkContext.parallelize(list(projects_cursorMongo)) -projects_df = spark.createDataFrame(projects_rdd, projects_schema) +projects_df = spark.createDataFrame(projects_rdd,projects_schema) projects_df = projects_df.withColumn( "project_created_type", F.when( - projects_df["projectTemplateId"].isNotNull() == True, "project imported from library" + projects_df["projectTemplateId"].isNotNull() == True , + "project imported from library" ).otherwise("user created project") ) projects_df = projects_df.withColumn( - "project_title", F.when( + "project_title", + F.when( projects_df["solutionInformation"]["name"].isNotNull() == True, projects_df["solutionInformation"]["name"] ).otherwise(projects_df["title"]) @@ -241,43 +227,90 @@ def removeduplicate(it): "date_time", to_timestamp(projects_df["updatedAt"], 'yyyy-MM-dd HH:mm:ss') ) -projects_df = projects_df.withColumn("date", F.split(projects_df["date_time"], ' ')[0]) -projects_df = projects_df.withColumn("time", F.split(projects_df["date_time"], ' ')[1]) +projects_df = projects_df.withColumn("date",F.split(projects_df["date_time"], ' ')[0]) +projects_df = projects_df.withColumn("time",F.split(projects_df["date_time"], ' ')[1]) projects_df = projects_df.withColumn( - "project_updated_date", F.concat( - F.col("date"), F.lit("T"), F.col("time"), F.lit(".000Z") - ) + "project_updated_date", F.concat(F.col("date"), + F.lit("T"), F.col("time"),F.lit(".000Z")) +) + +projects_df = projects_df.withColumn( + "project_deleted_flag", + F.when( + (projects_df["isDeleted"].isNotNull() == True) & + (projects_df["isDeleted"] == True), + "true" + ).when( + (projects_df["isDeleted"].isNotNull() == True) & + (projects_df["isDeleted"] == False), + "false" + ).otherwise("false") +) + +projects_df = projects_df.withColumn( + "private_program", + F.when( + (projects_df["isAPrivateProgram"].isNotNull() == True) & + (projects_df["isAPrivateProgram"] == True), + "true" + ).when( + (projects_df["isAPrivateProgram"].isNotNull() == True) & + (projects_df["isAPrivateProgram"] == False), + "false" + ).otherwise("false") ) projects_df = projects_df.withColumn( - "deleted_flag", F.when( - (projects_df["isDeleted"].isNotNull() == True) & - (projects_df["isDeleted"] == True), "true" + "project_terms_and_condition", + F.when( + (projects_df["hasAcceptedTAndC"].isNotNull() == True) & + (projects_df["hasAcceptedTAndC"] == True), + "true" ).when( - (projects_df["isDeleted"].isNotNull() == True) & - (projects_df["isDeleted"] == False), "false" + (projects_df["hasAcceptedTAndC"].isNotNull() == True) & + (projects_df["hasAcceptedTAndC"] == False), + "false" ).otherwise("false") ) -projects_df = projects_df.withColumn("exploded_categories", F.explode_outer(F.col("categories"))) +projects_df = projects_df.withColumn( + "exploded_categories", F.explode_outer(F.col("categories")) +) projects_df = projects_df.withColumn("parent_channel", F.lit("SHIKSHALOKAM")) projects_df = projects_df.withColumn("exploded_tasks", F.explode_outer(F.col("tasks"))) projects_df = projects_df.withColumn( - "exploded_tasks_attachments", F.explode_outer(projects_df["exploded_tasks"]["attachments"]) + "exploded_tasks_attachments", + F.explode_outer(projects_df["exploded_tasks"]["attachments"]) ) projects_df = projects_df.withColumn( - "task_evidence_status", F.when( - projects_df["exploded_tasks_attachments"]["sourcePath"].isNotNull() == True, True + "task_evidence_status", + F.when( + projects_df["exploded_tasks_attachments"]["sourcePath"].isNotNull() == True, + True ).otherwise(False) ) projects_df = projects_df.withColumn( - "task_evidence", F.when( + "task_deleted_flag", + F.when( + (projects_df["exploded_tasks"]["isDeleted"].isNotNull() == True) & + (projects_df["exploded_tasks"]["isDeleted"] == True), + "true" + ).when( + (projects_df["exploded_tasks"]["isDeleted"].isNotNull() == True) & + (projects_df["exploded_tasks"]["isDeleted"] == False), + "false" + ).otherwise("false") +) + +projects_df = projects_df.withColumn( + "task_evidence", + F.when( projects_df["exploded_tasks_attachments"]["sourcePath"].isNotNull() == True, F.concat( F.lit("https://samikshaprod.blob.core.windows.net/samiksha/"), @@ -286,165 +319,226 @@ def removeduplicate(it): ) ) -projects_df = projects_df.withColumn("exploded_sub_tasks", F.explode_outer(projects_df["exploded_tasks"]["children"])) +projects_df = projects_df.withColumn( + "exploded_sub_tasks", F.explode_outer(projects_df["exploded_tasks"]["children"]) +) + +projects_df = projects_df.withColumn( + "sub_task_deleted_flag", + F.when(( + projects_df["exploded_sub_tasks"]["isDeleted"].isNotNull() == True) & + (projects_df["exploded_sub_tasks"]["isDeleted"] == True), + "true" + ).when( + (projects_df["exploded_sub_tasks"]["isDeleted"].isNotNull() == True) & + (projects_df["exploded_sub_tasks"]["isDeleted"] == False), + "false" + ).otherwise("false") +) projects_df_cols = projects_df.select( - projects_df["_id"].alias("project_id"), projects_df["project_created_type"], + projects_df["_id"].alias("project_id"), + projects_df["project_created_type"], projects_df["project_title"], projects_df["title"].alias("project_title_editable"), projects_df["programId"].alias("program_id"), + projects_df["programExternalId"].alias("program_externalId"), projects_df["programInformation"]["name"].alias("program_name"), projects_df["metaInformation"]["duration"].alias("project_duration"), projects_df["syncedAt"].alias("project_last_sync"), - projects_df["project_updated_date"], projects_df["deleted_flag"], + projects_df["project_updated_date"], + projects_df["project_deleted_flag"], projects_df["exploded_categories"]["name"].alias("area_of_improvement"), projects_df["status"].alias("status_of_project"), projects_df["userId"].alias("createdBy"), - projects_df["description"].alias("project_goal"), projects_df["parent_channel"], + projects_df["description"].alias("project_goal"), + projects_df["parent_channel"], projects_df["createdAt"].alias("project_created_date"), projects_df["exploded_tasks"]["_id"].alias("task_id"), projects_df["exploded_tasks"]["name"].alias("tasks"), projects_df["exploded_tasks"]["assignee"].alias("task_assigned_to"), projects_df["exploded_tasks"]["startDate"].alias("task_start_date"), projects_df["exploded_tasks"]["endDate"].alias("task_end_date"), - projects_df["exploded_tasks"]["syncedAt"].alias("tasks_date"), - projects_df["exploded_tasks"]["status"].alias("tasks_status"), - projects_df["task_evidence"], projects_df["task_evidence_status"], + projects_df["exploded_tasks"]["syncedAt"].alias("tasks_date"),projects_df["exploded_tasks"]["status"].alias("tasks_status"), + projects_df["task_evidence"], + projects_df["task_evidence_status"], projects_df["exploded_sub_tasks"]["_id"].alias("sub_task_id"), projects_df["exploded_sub_tasks"]["name"].alias("sub_task"), projects_df["exploded_sub_tasks"]["status"].alias("sub_task_status"), projects_df["exploded_sub_tasks"]["syncedAt"].alias("sub_task_date"), projects_df["exploded_sub_tasks"]["startDate"].alias("sub_task_start_date"), - projects_df["exploded_sub_tasks"]["endDate"].alias("sub_task_end_date") + projects_df["exploded_sub_tasks"]["endDate"].alias("sub_task_end_date"), + projects_df["private_program"], + projects_df["task_deleted_flag"], + projects_df["sub_task_deleted_flag"], + projects_df["project_terms_and_condition"] ) projects_df_cols = projects_df_cols.dropDuplicates() projects_userid_df = projects_df_cols.select("createdBy") +userId_projects_df_before = [] userId_projects_df_after = [] userId_arr = [] uniqueuserId_arr = [] userId_projects_df_before = projects_userid_df.toJSON().map(lambda j: json.loads(j)).collect() for uid in userId_projects_df_before: userId_arr.append(uid["createdBy"]) -uniqueuserId_arr = list(removeduplicate(userId_arr)) -get_keycloak_obj = get_keyclock_accesstoken() +uniqueuserId_arr = list(removeduplicate(userId_arr)) user_info_arr = [] entitiesArr = [] + for usr in uniqueuserId_arr: - readUserObj = {} - readUserObj = readUser(usr, get_keycloak_obj["access_token"]) - if readUserObj: - readResult = False - readResult = "result" in readUserObj - if readResult == True: - readResponse = False - readResponse = "response" in readUserObj["result"] - if readResponse == True: - userEntitiesArr = [] - userObj = {} - try: - if len(readUserObj["result"]["response"]["userLocations"]) > 0: - for usrLoc in readUserObj["result"]["response"]["userLocations"]: - userObj[usrLoc["type"] + '_name'] = usrLoc["name"] - userObj[usrLoc["type"] + '_id'] = usrLoc["id"] - userObj[usrLoc["type"] + '_externalId'] = usrLoc["code"] - except KeyError: - pass - userEntitiesArr = list(userObj.keys()) - entitiesArr.extend(userEntitiesArr) - userObj["id"] = readUserObj["result"]["response"]["id"] - userObj["user_id"] = readUserObj["result"]["response"]["userName"] - userObj["user_full_name"] = readUserObj["result"]["response"]["firstName"] - userObj["channel"] = readUserObj["result"]["response"]["rootOrgId"] - userRoles = None - try: - userRoles = readUserObj["result"]["response"]["userSubType"] - except KeyError: - userRoles = '' - try: - if userRoles: - userObj["designation"] = userRoles - except KeyError: - pass - user_info_arr.append(userObj) + userObj = {} + userObj = datastore.hgetall("user:"+usr) + if userObj : + stateName = None + blockName = None + districtName = None + clusterName = None + rootOrgId = None + userSubType = None + userSchool = None + userSchoolUDISE = None + userSchoolName = None + try: + userSchool = userObj["school"] + except KeyError : + userSchool = '' + try: + userSchoolUDISE = userObj["schooludisecode"] + except KeyError : + userSchoolUDISE = '' + try: + userSchoolName = userObj["schoolname"] + except KeyError : + userSchoolName = '' + try: + userSubType = userObj["usersubtype"] + except KeyError : + userSubType = '' + try: + stateName = userObj["state"] + except KeyError : + stateName = '' + try: + blockName = userObj["block"] + except KeyError : + blockName = '' + try: + districtName = userObj["district"] + except KeyError : + districtName = '' + try: + clusterName = userObj["cluster"] + except KeyError : + clusterName = '' + try: + rootOrgId = userObj["rootorgid"] + except KeyError : + rootOrgId = '' + + userEntitiesArr = [] + userInfoObj = {} + userInfoObj['school_name'] = userSchoolName + userInfoObj['school_id'] = userSchool + userInfoObj['school_externalId'] = userSchoolUDISE + userInfoObj['district_name'] = districtName + userInfoObj['block_name'] = blockName + userInfoObj['cluster_name'] = clusterName + userInfoObj['state_name'] = stateName + + userEntitiesArr = list(userInfoObj.keys()) + entitiesArr.extend(userEntitiesArr) + userInfoObj["id"] = usr + userInfoObj["channel"] = rootOrgId + userRoles = None + try : + userRoles = userSubType + except KeyError : + userRoles = '' + try : + if userRoles : + userInfoObj["designation"] = userRoles + except KeyError : + userInfoObj["designation"] = '' + user_info_arr.append(userInfoObj) user_df = ks.DataFrame(user_info_arr) user_df = user_df.to_spark() final_projects_df = projects_df_cols.join( - user_df, projects_df_cols["createdBy"] == user_df["id"], "inner" + user_df, + projects_df_cols["createdBy"] == user_df["id"], + "inner" ).drop(user_df["id"]) final_projects_df = final_projects_df.dropDuplicates() final_projects_df.coalesce(1).write.format("json").mode("overwrite").save( - config.get("FILE_PATHS", "projects_output_dir") + "/" + config.get("OUTPUT_DIR", "projects_folder") + "/" ) -for filename in os.listdir(config.get("FILE_PATHS", "projects_output_dir") + "/"): +for filename in os.listdir(config.get("OUTPUT_DIR", "projects_folder")+"/"): if filename.endswith(".json"): - os.rename( - config.get("FILE_PATHS", "projects_output_dir") + "/" + filename, - config.get("FILE_PATHS", "projects_output_dir") + "/sl_projects.json" + os.rename( + config.get("OUTPUT_DIR", "projects_folder") + "/" + filename, + config.get("OUTPUT_DIR", "projects_folder") + "/sl_projects.json" ) blob_service_client = BlockBlobService( - account_name=config.get("AZURE", "account_name"), + account_name=config.get("AZURE", "account_name"), sas_token=config.get("AZURE", "sas_token") ) container_name = config.get("AZURE", "container_name") -local_path = config.get("FILE_PATHS", "projects_output_dir") -blob_path = config.get("AZURE", "blob_path") +local_path = config.get("OUTPUT_DIR", "projects_folder") +blob_path = config.get("AZURE", "projects_blob_path") for files in os.listdir(local_path): if "sl_projects.json" in files: blob_service_client.create_blob_from_path( container_name, - os.path.join(blob_path, files), + os.path.join(blob_path,files), local_path + "/" + files ) -os.remove(config.get("FILE_PATHS", "projects_output_dir") + "/sl_projects.json") +os.remove(config.get("OUTPUT_DIR", "projects_folder") + "/sl_projects.json") dimensionsArr = [] dimensionsArr = list(set(entitiesArr)) submissionReportColumnNamesArr = [ - 'user_id', 'user_full_name', 'project_title', 'project_goal', 'project_created_date', - 'project_last_sync', 'area_of_improvement', 'status_of_project', 'tasks', - 'tasks_date', 'tasks_status', 'sub_task', 'sub_task_status', 'sub_task_date', - "task_start_date", "task_end_date", "sub_task_start_date", "sub_task_end_date", - "designation", "deleted_flag", "task_evidence", "task_evidence_status", "project_id", - "task_id", "sub_task_id", "project_created_type", "task_assigned_to", 'channel', - 'parent_channel', 'program_id', 'program_name', 'project_updated_date', 'createdBy', - 'project_title_editable', 'project_duration' + 'project_title', 'project_goal', 'project_created_date', 'project_last_sync', + 'area_of_improvement', 'status_of_project', 'tasks', 'tasks_date', 'tasks_status', + 'sub_task', 'sub_task_status', 'sub_task_date', "task_start_date", "task_end_date", + "sub_task_start_date", "sub_task_end_date", "designation", "project_deleted_flag", + "task_evidence", "task_evidence_status", "project_id", "task_id", "sub_task_id", + "project_created_type", "task_assigned_to", 'channel', 'parent_channel', 'program_id', + 'program_name', 'project_updated_date', 'createdBy', 'project_title_editable', + 'project_duration', 'program_externalId', 'private_program', 'task_deleted_flag', + 'sub_task_deleted_flag', 'project_terms_and_condition' ] dimensionsArr.extend(submissionReportColumnNamesArr) -datasources = ["sl-project"] -payload = json.loads(config.get("SPECS", "sl_general_unnati_spec")) +datasources = ["sl_projects"] +payload = {} +payload = json.loads(config.get("DRUID", "general_unnati_spec")) payload["spec"]["dataSchema"]["dimensionsSpec"]["dimensions"] = dimensionsArr ingestion_specs = [json.dumps(payload)] -for i, j in zip(datasources, ingestion_specs): - - druid_end_point = config.get("DRUID", "url") + config.get("ENDPOINTS", "coordinator_v1_ds") + i - - druid_batch_end_point = config.get("DRUID", "url") + config.get("ENDPOINTS", "indexer_v1_task") - - headers = {'Content-Type': 'application/json'} - +for i, j in zip(datasources,ingestion_specs): + druid_end_point = config.get("DRUID", "druid_end_point") + i + druid_batch_end_point = config.get("DRUID", "druid_batch_end_point") + headers = {'Content-Type' : 'application/json'} get_timestamp = requests.get(druid_end_point, headers=headers) - if get_timestamp.status_code == 200: - successLogger.debug("Successfully fetched time stamp of the datasource " + i) + successLogger.debug("Successfully fetched time stamp of the datasource " + i ) timestamp = get_timestamp.json() - # calculating interval from druid get api + #calculating interval from druid get api minTime = timestamp["segments"]["minTime"] maxTime = timestamp["segments"]["maxTime"] min1 = datetime.datetime.strptime(minTime, "%Y-%m-%dT%H:%M:%S.%fZ") @@ -455,16 +549,19 @@ def removeduplicate(it): minmonth = "{:02d}".format(min1.month) maxmonth = "{:02d}".format(max1.month) min2 = str(min1.year) + "-" + minmonth + "-" + str(min1.day) - max2 = str(max1.year) + "-" + maxmonth + "-" + str(max1.day) + max2 = str(max1.year) + "-" + maxmonth + "-" + str(max1.day) interval = min2 + "_" + max2 time.sleep(50) disable_datasource = requests.delete(druid_end_point, headers=headers) + if disable_datasource.status_code == 200: successLogger.debug("successfully disabled the datasource " + i) time.sleep(300) - - delete_segments = requests.delete(druid_end_point + "/intervals/" + interval, headers=headers) + + delete_segments = requests.delete( + druid_end_point + "/intervals/" + interval, headers=headers + ) if delete_segments.status_code == 200: successLogger.debug("successfully deleted the segments " + i) time.sleep(300) @@ -472,16 +569,22 @@ def removeduplicate(it): enable_datasource = requests.get(druid_end_point, headers=headers) if enable_datasource.status_code == 204: successLogger.debug("successfully enabled the datasource " + i) - + time.sleep(300) - start_supervisor = requests.post(druid_batch_end_point, data=j, headers=headers) + start_supervisor = requests.post( + druid_batch_end_point, data=j, headers=headers + ) successLogger.debug("ingest data") if start_supervisor.status_code == 200: - successLogger.debug("started the batch ingestion task sucessfully for the datasource " + i) + successLogger.debug( + "started the batch ingestion task sucessfully for the datasource " + i + ) time.sleep(50) else: - errorLogger.error("failed to start batch ingestion task" + str(start_supervisor.status_code)) + errorLogger.error( + "failed to start batch ingestion task" + str(start_supervisor.status_code) + ) else: errorLogger.error("failed to enable the datasource " + i) else: @@ -490,10 +593,17 @@ def removeduplicate(it): errorLogger.error("failed to disable the datasource " + i) elif get_timestamp.status_code == 204: - start_supervisor = requests.post(druid_batch_end_point, data=j, headers=headers) + start_supervisor = requests.post( + druid_batch_end_point, data=j, headers=headers + ) if start_supervisor.status_code == 200: - successLogger.debug("started the batch ingestion task sucessfully for the datasource " + i) + successLogger.debug( + "started the batch ingestion task sucessfully for the datasource " + i + ) time.sleep(50) else: errorLogger.error(start_supervisor.text) - errorLogger.error("failed to start batch ingestion task" + str(start_supervisor.status_code)) + errorLogger.error( + "failed to start batch ingestion task" + str(start_supervisor.status_code) + ) + diff --git a/requirements.txt b/requirements.txt index c1e7995..98d834b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,93 @@ -Add the pip modules in here \ No newline at end of file +aiocontextvars==0.2.2 +aiohttp==3.6.2 +aiohttp-cors==0.7.0 +async-timeout==3.0.1 +attrs==19.3.0 +azure-common==1.1.26 +azure-core==1.9.0 +azure-storage-blob==2.1.0 +azure-storage-common==2.1.0 +boto3==1.12.33 +botocore==1.15.33 +cachetools==4.1.1 +cassandra-driver==3.22.0 +certifi==2019.11.28 +cffi==1.14.0 +chardet==3.0.4 +click==7.1.1 +colorclass==2.2.0 +colorlog==4.1.0 +compress-json==1.0.4 +contextvars==2.4 +croniter==0.3.31 +cryptography==2.9.2 +cycler==0.10.0 +docutils==0.15.2 +elasticsearch==7.7.0 +faust==1.10.4 +fernet==1.0.1 +geographiclib==1.50 +geomet==0.1.2 +geopy==2.0.0 +glob2==0.7 +google-api-core==1.22.1 +google-api-python-client==1.10.0 +google-auth==1.20.1 +google-auth-httplib2==0.0.4 +googleapis-common-protos==1.52.0 +httplib2==0.18.1 +idna==2.9 +idna-ssl==1.1.0 +immutables==0.12 +isodate==0.6.0 +jmespath==0.9.5 +kafka-python==1.4.7 +kiwisolver==1.2.0 +koalas==0.33.0 +matplotlib==3.2.1 +mode==4.3.2 +msrest==0.6.19 +multidict==4.7.5 +mypy-extensions==0.4.3 +numpy==1.18.5 +oauth2client==4.1.3 +oauthlib==3.1.0 +opentracing==1.3.0 +pandas==1.0.4 +protobuf==3.13.0 +psycopg2-binary==2.8.4 +py4j==0.10.7 +pyaes==1.6.1 +pyarrow==0.17.1 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycparser==2.20 +pycrypto==2.6.1 +PyDrive==1.3.1 +pydruid==0.5.9 +PyJWT==1.7.1 +pymongo==3.10.1 +pyparsing==2.4.7 +pyspark==2.4.5 +python-dateutil==2.8.1 +pytz==2020.1 +PyYAML==5.3.1 +requests==2.23.0 +requests-oauthlib==1.3.0 +robinhood-aiokafka==1.1.6 +rsa==4.6 +s3transfer==0.3.3 +schedule==0.6.0 +six==1.14.0 +slackclient==1.0.7 +terminaltables==3.1.0 +typing-extensions==3.7.4.2 +uritemplate==3.0.1 +urllib3==1.25.8 +venusian==1.2.0 +websocket-client==0.57.0 +xlrd==1.2.0 +XlsxWriter==1.3.3 +xlutils==2.0.0 +xlwt==1.3.0 +yarl==1.4.2 diff --git a/run.sh b/run.sh new file mode 100644 index 0000000..04d72d3 --- /dev/null +++ b/run.sh @@ -0,0 +1,33 @@ +#!/bin/bash +source /etc/profile +export PYSPARK_PYTHON=python3 +export TZ=Asia/Kolkata date + +echo "RUNNING JOB" + +echo "" +echo "$(date)" +echo "=====================================" +echo "Daily Projects Batch Job Ingestion == Started" +. /opt/sparkjobs/spark_venv/bin/activate && /opt/sparkjobs/spark_venv/lib/python3.6/site-packages/pyspark/bin/spark-submit --driver-memory 5g /opt/sparkjobs/source/projects/projects_data_extraction.py +echo "Daily Projects Batch Job Ingestion == Completed" +echo "*************************************" + +echo "" +echo "$(date)" +echo "=====================================" +echo "Daily Observation Status Batch Job Ingestion == Started" +. /opt/sparkjobs/spark_venv/bin/activate && /opt/sparkjobs/spark_venv/lib/python3.6/site-packages/pyspark/bin/spark-submit --driver-memory 5g /opt/sparkjobs/source/observations/status/sl_py_obs_submission_status.py +echo "Daily Observation Status Batch Job Ingestion == Completed" +echo "*************************************" + +echo "" +echo "$(date)" +echo "=====================================" +echo "Daily Assessment Report Batch Job Ingestion == Started" +. /opt/sparkjobs/spark_venv/bin/activate && /opt/sparkjobs/spark_venv/lib/python3.6/site-packages/pyspark/bin/spark-submit --driver-memory 5g /opt/sparkjobs/source/assessment/assessment_streaming_report.py +echo "Daily Assessment Report Batch Job Ingestion == Completed" +echo "*************************************" + +echo "COMPLETED" + diff --git a/survey/config.sample b/survey/config.sample deleted file mode 100644 index a2aaab3..0000000 --- a/survey/config.sample +++ /dev/null @@ -1,70 +0,0 @@ -# -------------------- -[KEYCLOAK] -# -------------------- - -url = http:///auth/realms/sunbird/protocol/openid-connect/token -grant_type = refresh_token -client_id = android -refresh_token = - -# -------------------- -[SUNBIRD] -# -------------------- - -base_url_ip = http:///api - -# -------------------- -[ENDPOINT] -# -------------------- - -read_user = user/v3/read - -# -------------------- -[STORAGE] -# -------------------- - -base_url = https:/// - -# -------------------- -[COMMON] -# -------------------- - -parent_channel = -content_type = application/json -authorization = -diksha_survey_app_name = - -# -------------------- -[MONGO] -# -------------------- - -url = mongodb://:27017 -db = - -solutionsCollec = solutions -entityTypeCollec = entityTypes -questionsCollec = questions -criteriaCollec = criteria -entitiesCollec = entities -programsCollec = programs -surveysCollec = surveys -surveySubmissionsCollec = surveySubmissions - -# -------------------- -[KAFKA] -# -------------------- - -url = :9092 -raw_data_topic = -druid_topic = -evidence_druid_topic = -dev_topic = - -# -------------------- -[LOG_FILE] -# -------------------- - -survey_streaming_success_log_filename = -survey_streaming_error_log_filename = -survey_evidence_streaming_success_log_filename = -survey_evidence_streaming_error_log_filename = diff --git a/survey/py_survey_evidence_streaming.py b/survey/py_survey_evidence_streaming.py index 7b5c7d4..2c595f2 100755 --- a/survey/py_survey_evidence_streaming.py +++ b/survey/py_survey_evidence_streaming.py @@ -4,20 +4,26 @@ # Description : # # ----------------------------------------------------------------- + from pymongo import MongoClient from bson.objectid import ObjectId -import csv, os -import json +import os, json import datetime -from kafka import KafkaProducer +from datetime import date,time +import requests +from kafka import KafkaConsumer, KafkaProducer +import dateutil +from dateutil import parser as date_parser from configparser import ConfigParser, ExtendedInterpolation import faust +import logging import logging.handlers +import time from logging.handlers import TimedRotatingFileHandler -config_path = os.path.dirname(os.path.abspath(__file__)) +config_path = os.path.split(os.path.dirname(os.path.abspath(__file__))) config = ConfigParser(interpolation=ExtendedInterpolation()) -config.read(config_path + "/config.ini") +config.read(config_path[0] + "/config.ini") formatter = logging.Formatter('%(asctime)s - %(levelname)s') @@ -26,10 +32,10 @@ # Add the log message handler to the logger successHandler = logging.handlers.RotatingFileHandler( - config.get('LOG_FILE', 'survey_evidence_streaming_success_log_filename') + config.get('LOGS', 'survey_evidence_streaming_success_log_filename') ) successBackuphandler = TimedRotatingFileHandler( - config.get('LOG_FILE', 'survey_evidence_streaming_success_log_filename'), + config.get('LOGS', 'survey_evidence_streaming_success_log_filename'), when="w0", backupCount=1 ) @@ -40,10 +46,10 @@ errorLogger = logging.getLogger('error log') errorLogger.setLevel(logging.ERROR) errorHandler = logging.handlers.RotatingFileHandler( - config.get('LOG_FILE', 'survey_evidence_streaming_error_log_filename') + config.get('LOGS', 'survey_evidence_streaming_error_log_filename') ) errorBackuphandler = TimedRotatingFileHandler( - config.get('LOG_FILE', 'survey_evidence_streaming_error_log_filename'), + config.get('LOGS', 'survey_evidence_streaming_error_log_filename'), when="w0", backupCount=1 ) @@ -51,85 +57,80 @@ errorLogger.addHandler(errorHandler) errorLogger.addHandler(errorBackuphandler) -# db production -clientqa = MongoClient(config.get('MONGO', 'url')) -dbqa = clientqa[config.get('MONGO', 'db')] - -surveySubmissionsCollec = dbqa[config.get('MONGO', 'surveySubmissionsCollec')] -solutionsDevCollec = dbqa[config.get('MONGO', 'solutionsCollec')] -surveysCollec = dbqa[config.get('MONGO', 'surveysCollec')] -entityTypeDevCollec = dbqa[config.get('MONGO', 'entityTypeCollec')] -questionsDevCollec = dbqa[config.get('MONGO', 'questionsCollec')] -criteriaDevCollec = dbqa[config.get('MONGO', 'criteriaCollec')] -entitiesDevCollec = dbqa[config.get('MONGO', 'entitiesCollec')] - try: app = faust.App( 'sl_py_survey_evidence_prod', - broker='kafka://' + config.get("KAFKA", "url"), + broker='kafka://'+config.get("KAFKA", "kafka_url"), value_serializer='raw', web_port=7005 ) - kafka_url = (config.get("KAFKA", "url")) + kafka_url = config.get("KAFKA", "kafka_url") producer = KafkaProducer(bootstrap_servers=[kafka_url]) + + #db production + client = MongoClient(config.get('MONGO', 'mongo_url')) + db = client[config.get('MONGO', 'database_name')] + surveySubmissionsCollec = db[config.get('MONGO', 'survey_submissions_collection')] + questionsCollec = db[config.get('MONGO', 'questions_collec')] + except Exception as e: errorLogger.error(e, exc_info=True) try: - def convert(lst): + def convert(lst): return ','.join(lst) except Exception as e: errorLogger.error(e, exc_info=True) try: def evidence_extraction(msg_id): - for obSub in surveySubmissionsCollec.find({'_id': ObjectId(msg_id)}): + for obSub in surveySubmissionsCollec.find({'_id':ObjectId(msg_id)}): successLogger.debug("Survey Evidence Submission Id : " + str(msg_id)) try: - completedDate = str(datetime.datetime.date(obSub['completedDate'])) + 'T' + \ - str(datetime.datetime.time(obSub['completedDate'])) + 'Z' + completedDate = str( + datetime.datetime.date(obSub['completedDate']) + ) + 'T' + str( + datetime.datetime.time(obSub['completedDate']) + ) + 'Z' except KeyError: pass evidence_sub_count = 0 try: - answersArr = [v for v in obSub['answers'].values()] + answersArr = [ v for v in obSub['answers'].values()] except KeyError: pass for ans in answersArr: try: if len(ans['fileName']): - evidence_sub_count = evidence_sub_count + len(ans['fileName']) + evidence_sub_count = evidence_sub_count + len(ans['fileName']) except KeyError: if len(ans['instanceFileName']): for instance in ans['instanceFileName']: - evidence_sub_count = evidence_sub_count + len(instance) + evidence_sub_count = evidence_sub_count + len(instance) for answer in answersArr: surveySubQuestionsObj = {} surveySubQuestionsObj['completedDate'] = completedDate surveySubQuestionsObj['total_evidences'] = evidence_sub_count - surveySubQuestionsObj['userName'] = obSub['evidencesStatus'][0]['submissions'][0]['submittedByName'] - surveySubQuestionsObj['userName'] = surveySubQuestionsObj['userName'].replace("null", "") surveySubQuestionsObj['surveySubmissionId'] = str(obSub['_id']) surveySubQuestionsObj['createdBy'] = obSub['createdBy'] surveySubQuestionsObj['solutionExternalId'] = obSub['solutionExternalId'] surveySubQuestionsObj['solutionId'] = str(obSub['solutionId']) surveySubQuestionsObj['surveyId'] = str(obSub['surveyId']) - fileName = [] fileSourcePath = [] try: surveySubQuestionsObj['remarks'] = answer['remarks'] - surveySubQuestionsObj['questionName'] = answer['payload']['question'][0] except KeyError: - pass + surveySubQuestionsObj['remarks'] = '' surveySubQuestionsObj['questionId'] = str(answer['qid']) - for ques in questionsDevCollec.find({'_id': ObjectId(surveySubQuestionsObj['questionId'])}): + for ques in questionsCollec.find({'_id':ObjectId(surveySubQuestionsObj['questionId'])}): surveySubQuestionsObj['questionExternalId'] = ques['externalId'] + surveySubQuestionsObj['questionName'] = ques['question'][0] surveySubQuestionsObj['questionResponseType'] = answer['responseType'] try: surveySubQuestionsObj['appName'] = obSub["appInformation"]["appName"].lower() - except KeyError: + except KeyError : surveySubQuestionsObj['appName'] = config.get("COMMON", "diksha_survey_app_name") evidence = [] evidenceCount = 0 @@ -140,7 +141,7 @@ def evidence_extraction(msg_id): evidenceCount = len(evidence) except KeyError: if answer['instanceFileName']: - for inst in answer['instanceFileName']: + for inst in answer['instanceFileName'] : evidence.extend(inst) surveySubQuestionsObj['evidence_count'] = len(evidence) evidenceCount = len(evidence) @@ -151,7 +152,7 @@ def evidence_extraction(msg_id): surveySubQuestionsObj['fileSourcePath'] = convert(fileSourcePath) if evidenceCount > 0: producer.send( - (config.get("KAFKA", "evidence_druid_topic")), + (config.get("KAFKA", "kafka_evidence_survey_druid_topic")), json.dumps(surveySubQuestionsObj).encode('utf-8') ) producer.flush() @@ -160,9 +161,9 @@ def evidence_extraction(msg_id): errorLogger.error(e, exc_info=True) try: - # loop the consumer messages and produce it to another topic - @app.agent(config.get("KAFKA", "dev_topic")) - async def survey_Faust(consumer): + #loop the consumer messages and produce it to another topic + @app.agent(config.get("KAFKA", "kafka_raw_survey_topic")) + async def survey_Faust(consumer) : async for msg in consumer: msg_val = msg.decode('utf-8') msg_data = json.loads(msg_val) @@ -170,7 +171,8 @@ async def survey_Faust(consumer): evidence_extraction(msg_data['_id']) successLogger.debug("********* END OF SURVEY EVIDENCE SUBMISSION ***********") except Exception as e: - errorLogger.error(e, exc_info=True) + errorLogger.error(e,exc_info=True) + if __name__ == '__main__': - app.main() + app.main() diff --git a/survey/py_survey_streaming.py b/survey/py_survey_streaming.py index e3c02dc..a3ae2d0 100755 --- a/survey/py_survey_streaming.py +++ b/survey/py_survey_streaming.py @@ -1,25 +1,34 @@ # ----------------------------------------------------------------- # Name : py_survey_streaming.py # Author : -# Description : Program to read data from one kafka topic and produce it -# to another kafka topic +# Description : Program to read data from one kafka topic and +# produce it to another kafka topic # ----------------------------------------------------------------- + +from pymongo import MongoClient +from bson.objectid import ObjectId +import sys, os, json, time import datetime -import json +import requests +from kafka import KafkaConsumer, KafkaProducer +from configparser import ConfigParser,ExtendedInterpolation +from cassandra.cluster import Cluster +from cassandra.query import SimpleStatement,ConsistencyLevel +import kafka +from kafka.admin import KafkaAdminClient, NewTopic +from slackclient import SlackClient +import faust +import psycopg2 +from geopy.distance import geodesic +import logging import logging.handlers -import os -from configparser import ConfigParser, ExtendedInterpolation +import time from logging.handlers import TimedRotatingFileHandler +import redis -import faust -import requests -from bson.objectid import ObjectId -from kafka import KafkaProducer -from pymongo import MongoClient - -config_path = os.path.dirname(os.path.abspath(__file__)) +config_path = os.path.split(os.path.dirname(os.path.abspath(__file__))) config = ConfigParser(interpolation=ExtendedInterpolation()) -config.read(config_path + "/config.ini") +config.read(config_path[0] + "/config.ini") formatter = logging.Formatter('%(asctime)s - %(levelname)s') @@ -28,10 +37,10 @@ # Add the log message handler to the logger successHandler = logging.handlers.RotatingFileHandler( - config.get('LOG_FILE', 'survey_streaming_success_log_filename') + config.get('LOGS', 'survey_streaming_success_log_filename') ) successBackuphandler = TimedRotatingFileHandler( - config.get('LOG_FILE', 'survey_streaming_success_log_filename'), + config.get('LOGS', 'survey_streaming_success_log_filename'), when="w0", backupCount=1 ) @@ -42,10 +51,10 @@ errorLogger = logging.getLogger('error log') errorLogger.setLevel(logging.ERROR) errorHandler = logging.handlers.RotatingFileHandler( - config.get('LOG_FILE', 'survey_streaming_error_log_filename') + config.get('LOGS', 'survey_streaming_error_log_filename') ) errorBackuphandler = TimedRotatingFileHandler( - config.get('LOG_FILE', 'survey_streaming_error_log_filename'), + config.get('LOGS', 'survey_streaming_error_log_filename'), when="w0", backupCount=1 ) @@ -53,340 +62,340 @@ errorLogger.addHandler(errorHandler) errorLogger.addHandler(errorBackuphandler) -# db production -clientqa = MongoClient(config.get('MONGO', 'url')) -dbqa = clientqa[config.get('MONGO', 'db')] - -surveySubmissionsQACollec = dbqa[config.get('MONGO', 'surveySubmissionsCollec')] -solutionsQACollec = dbqa[config.get('MONGO', 'solutionsCollec')] -surveyQACollec = dbqa[config.get('MONGO', 'surveysCollec')] -entityTypeQACollec = dbqa[config.get('MONGO', 'entityTypeCollec')] -questionsQACollec = dbqa[config.get('MONGO', 'questionsCollec')] -criteriaQACollec = dbqa[config.get('MONGO', 'criteriaCollec')] -entitiesQACollec = dbqa[config.get('MONGO', 'entitiesCollec')] -programsQACollec = dbqa[config.get('MONGO', 'programsCollec')] - try: - kafka_url = (config.get("KAFKA", "url")) - + kafka_url = (config.get("KAFKA", "kafka_url")) app = faust.App( 'sl_survey_prod_faust', - broker='kafka://' + kafka_url, + broker='kafka://'+kafka_url, value_serializer='raw', web_port=7004, broker_max_poll_records=500 ) - rawTopicName = app.topic(config.get("KAFKA", "raw_data_topic")) - producer = KafkaProducer(bootstrap_servers=[config.get("KAFKA", "url")]) -except Exception as e: - errorLogger.error(e, exc_info=True) + rawTopicName = app.topic(config.get("KAFKA", "kafka_raw_survey_topic")) + producer = KafkaProducer(bootstrap_servers=[config.get("KAFKA", "kafka_url")]) -try: - def get_keycloak_access_token(): - url = config.get("KEYCLOAK", "url") - headers = {'Content-Type': 'application/x-www-form-urlencoded'} - body = { - "grant_type": config.get("KEYCLOAK", "grant_type"), - "client_id": config.get("KEYCLOAK", "client_id"), - "refresh_token": config.get("KEYCLOAK", "refresh_token") - } - response = requests.post( - url, data=body, headers=headers - ) - if response.status_code == 200: - successLogger.debug("getKeycloak api") - return response.json() - else: - errorLogger.error("Failure in getKeycloak API") - errorLogger.error(response) - errorLogger.error(response.text) + #db production + client = MongoClient(config.get('MONGO', 'mongo_url')) + db = client[config.get('MONGO', 'database_name')] + surveySubmissionsCollec = db[config.get('MONGO', 'survey_submissions_collection')] + solutionsCollec = db[config.get('MONGO', 'solutions_collec')] + surveyCollec = db[config.get('MONGO', 'survey_collection')] + questionsCollec = db[config.get('MONGO', 'questions_collec')] + criteriaCollec = db[config.get('MONGO', 'criteria_collec')] + programsCollec = db[config.get('MONGO', 'programs_collec')] -except Exception as e: - errorLogger.error(e, exc_info=True) + # redis cache connection + redis_connection = redis.ConnectionPool( + host=config.get("REDIS", "host"), + decode_responses=True, + port=config.get("REDIS", "port"), + db=config.get("REDIS", "db_name") + ) + datastore = redis.StrictRedis(connection_pool=redis_connection) -try: - def read_user(user_id, access_token): - queryStringReadUser = "?fields=completeness%2CmissingFields%2ClastLoginTime%2Ctopics%2Corganisations%2Croles%2Clocations%2Cdeclarations" - urlReadUser = config.get("SUNBIRD", "base_url_ip") + "/" + \ - config.get("ENDPOINT", "read_user") + "/" + \ - str(user_id) + queryStringReadUser - headersReadUser = { - 'Content-Type': config.get("COMMON", "content_type"), - 'Authorization': "Bearer " + config.get("COMMON", "authorization"), - 'X-authenticated-user-token': access_token, - 'X-Channel-id': config.get("COMMON", "parent_channel") - } - responseReadUser = requests.get(urlReadUser, headers=headersReadUser) - if responseReadUser.status_code == 200: - successLogger.debug("read user api") - return responseReadUser.json() - else: - errorLogger.error("Failure in read user api") - errorLogger.error(responseReadUser) - errorLogger.error(responseReadUser.text) except Exception as e: errorLogger.error(e, exc_info=True) + try: def obj_creation(msg_id): - data_keycloak = get_keycloak_access_token() - tokenKeyCheck = "access_token" in data_keycloak - if tokenKeyCheck: - accessToken = data_keycloak['access_token'] - successLogger.debug("Survey Submission Id : " + str(msg_id)) - cursorMongo = surveySubmissionsQACollec.find({'_id': ObjectId(msg_id)}, no_cursor_timeout=True) - for obSub in cursorMongo: - completedDate = str(datetime.datetime.date(obSub['completedDate'])) + 'T' + str( - datetime.datetime.time(obSub['completedDate'])) + 'Z' - createdAt = str(datetime.datetime.date(obSub['createdAt'])) + 'T' + str( - datetime.datetime.time(obSub['createdAt'])) + 'Z' - updatedAt = str(datetime.datetime.date(obSub['updatedAt'])) + 'T' + str( - datetime.datetime.time(obSub['updatedAt'])) + 'Z' - evidencesArr = [v for v in obSub['evidences'].values()] - evidence_sub_count = 0 - - # fetch user name from postgres with the help of keycloak id - queryJsonOutput = {} - queryJsonOutput = read_user(obSub["createdBy"], accessToken) - if queryJsonOutput["result"]["response"]["userName"]: - if 'answers' in obSub.keys(): - answersArr = [v for v in obSub['answers'].values()] - for ans in answersArr: - try: - if len(ans['fileName']): - evidence_sub_count = evidence_sub_count + len(ans['fileName']) - except KeyError: - pass - for ans in answersArr: - def sequence_number(external_id, answer): - for sol in solutionsQACollec.find({'externalId': obSub['solutionExternalId']}): - section = [k for k in sol['sections'].keys()] - # parsing through questionSequencebyecm to get the sequence number - try: - for num in range(len( - sol['questionSequenceByEcm'][answer['evidenceMethod']][section[0]])): - if sol['questionSequenceByEcm'][answer['evidenceMethod']][section[0]][num] == external_id: - return num + 1 - except KeyError: - pass + successLogger.debug("Survey Submission Id : " + str(msg_id)) + cursorMongo = surveySubmissionsCollec.find( + {'_id':ObjectId(msg_id)}, no_cursor_timeout=True + ) + for obSub in cursorMongo : + surveySubQuestionsArr = [] + completedDate = str( + datetime.datetime.date(obSub['completedDate']) + ) + 'T' + str( + datetime.datetime.time(obSub['completedDate']) + ) + 'Z' + createdAt = str( + datetime.datetime.date(obSub['createdAt']) + ) + 'T' + str( + datetime.datetime.time(obSub['createdAt']) + ) + 'Z' + updatedAt = str( + datetime.datetime.date(obSub['updatedAt']) + ) + 'T' + str( + datetime.datetime.time(obSub['updatedAt']) + ) + 'Z' + evidencesArr = [v for v in obSub['evidences'].values()] + evidence_sub_count = 0 - def creatingObj(answer, quesexternalId, ans_val, instNumber, responseLabel): - surveySubQuestionsObj = { - 'userName': obSub['evidencesStatus'][0]['submissions'][0]['submittedByName'] - } - surveySubQuestionsObj['userName'] = surveySubQuestionsObj['userName'].replace("null", - "") + # fetch user name from postgres with the help of keycloak id + userObj = {} + userObj = datastore.hgetall("user:" + obSub["createdBy"]) + if userObj : + rootOrgId = None + try: + rootOrgId = userObj["rootorgid"] + except KeyError : + rootOrgId = '' + if 'answers' in obSub.keys() : + answersArr = [v for v in obSub['answers'].values()] + for ans in answersArr: + try: + if len(ans['fileName']): + evidence_sub_count = evidence_sub_count + len(ans['fileName']) + except KeyError: + pass + for ans in answersArr: + def sequenceNumber(externalId,answer): + for solu in solutionsCollec.find({'_id':ObjectId(obSub['solutionId'])}): + section = [k for k in solu['sections'].keys()] + # parsing through questionSequencebyecm to get the sequence number try: - surveySubQuestionsObj['appName'] = obSub["appInformation"]["appName"].lower() + for num in range( + len(solu['questionSequenceByEcm'][answer['evidenceMethod']][section[0]]) + ): + if solu['questionSequenceByEcm'][answer['evidenceMethod']][section[0]][num] == externalId: + return num + 1 except KeyError: - surveySubQuestionsObj['appName'] = config.get("COMMON", "diksha_survey_app_name") - surveySubQuestionsObj['surveySubmissionId'] = str(obSub['_id']) + pass + + def creatingObj(answer,quesexternalId,ans_val,instNumber,responseLabel): + surveySubQuestionsObj = {} + try: + surveySubQuestionsObj['appName'] = obSub["appInformation"]["appName"].lower() + except KeyError : + surveySubQuestionsObj['appName'] = config.get("COMMON", "diksha_survey_app_name") - surveySubQuestionsObj['createdBy'] = obSub['createdBy'] + surveySubQuestionsObj['surveySubmissionId'] = str(obSub['_id']) - try: - if obSub['isAPrivateProgram']: - surveySubQuestionsObj['isAPrivateProgram'] = obSub['isAPrivateProgram'] - else: - surveySubQuestionsObj['isAPrivateProgram'] = False - except KeyError: + surveySubQuestionsObj['createdBy'] = obSub['createdBy'] + + try: + if obSub['isAPrivateProgram']: + surveySubQuestionsObj['isAPrivateProgram'] = obSub['isAPrivateProgram'] + else: surveySubQuestionsObj['isAPrivateProgram'] = False - pass + except KeyError: + surveySubQuestionsObj['isAPrivateProgram'] = False + pass - try: - surveySubQuestionsObj['programExternalId'] = obSub['programExternalId'] - except KeyError: - surveySubQuestionsObj['programExternalId'] = None - try: - surveySubQuestionsObj['programId'] = str(obSub['programId']) - except KeyError: - surveySubQuestionsObj['programId'] = None - try: - for program in programsQACollec.find({'externalId': obSub['programExternalId']}): - surveySubQuestionsObj['programName'] = program['name'] - except KeyError: - surveySubQuestionsObj['programName'] = None + try: + surveySubQuestionsObj['programExternalId'] = obSub['programExternalId'] + except KeyError : + surveySubQuestionsObj['programExternalId'] = None + try: + surveySubQuestionsObj['programId'] = str(obSub['programId']) + except KeyError : + surveySubQuestionsObj['programId'] = None + try: + for program in programsCollec.find({'externalId':obSub['programExternalId']}): + surveySubQuestionsObj['programName'] = program['name'] + except KeyError : + surveySubQuestionsObj['programName'] = None - surveySubQuestionsObj['solutionExternalId'] = obSub['solutionExternalId'] - surveySubQuestionsObj['surveyId'] = str(obSub['surveyId']) - for solu in solutionsQACollec.find({'externalId': obSub['solutionExternalId']}): - surveySubQuestionsObj['solutionId'] = str(solu["_id"]) - surveySubQuestionsObj['solutionName'] = solu['name'] - section = [k for k in solu['sections'].keys()] - surveySubQuestionsObj['section'] = section[0] - surveySubQuestionsObj['questionSequenceByEcm'] = sequence_number( - quesexternalId, answer - ) - try: - if solu['scoringSystem'] == 'pointsBasedScoring': + surveySubQuestionsObj['solutionExternalId'] = obSub['solutionExternalId'] + surveySubQuestionsObj['surveyId'] = str(obSub['surveyId']) + for solu in solutionsCollec.find({'_id':ObjectId(obSub['solutionId'])}): + surveySubQuestionsObj['solutionId'] = str(solu["_id"]) + surveySubQuestionsObj['solutionName'] = solu['name'] + section = [k for k in solu['sections'].keys()] + surveySubQuestionsObj['section'] = section[0] + surveySubQuestionsObj['questionSequenceByEcm']= sequenceNumber(quesexternalId, answer) + try: + if solu['scoringSystem'] == 'pointsBasedScoring': + try: surveySubQuestionsObj['totalScore'] = obSub['pointsBasedMaxScore'] + except KeyError : + surveySubQuestionsObj['totalScore'] = '' + try: surveySubQuestionsObj['scoreAchieved'] = obSub['pointsBasedScoreAchieved'] - surveySubQuestionsObj['totalpercentage'] = obSub[ - 'pointsBasedPercentageScore'] + except KeyError : + surveySubQuestionsObj['scoreAchieved'] = '' + try: + surveySubQuestionsObj['totalpercentage'] = obSub['pointsBasedPercentageScore'] + except KeyError : + surveySubQuestionsObj['totalpercentage'] = '' + try: surveySubQuestionsObj['maxScore'] = answer['maxScore'] + except KeyError : + surveySubQuestionsObj['maxScore'] = '' + try: surveySubQuestionsObj['minScore'] = answer['scoreAchieved'] + except KeyError : + surveySubQuestionsObj['minScore'] = '' + try: surveySubQuestionsObj['percentageScore'] = answer['percentageScore'] - surveySubQuestionsObj['pointsBasedScoreInParent'] = answer[ - 'pointsBasedScoreInParent'] - except KeyError: - pass + except KeyError : + surveySubQuestionsObj['percentageScore'] = '' + try: + surveySubQuestionsObj['pointsBasedScoreInParent'] = answer['pointsBasedScoreInParent'] + except KeyError : + surveySubQuestionsObj['pointsBasedScoreInParent'] = '' + except KeyError: + surveySubQuestionsObj['totalScore'] = '' + surveySubQuestionsObj['scoreAchieved'] = '' + surveySubQuestionsObj['totalpercentage'] = '' + surveySubQuestionsObj['maxScore'] = '' + surveySubQuestionsObj['minScore'] = '' + surveySubQuestionsObj['percentageScore'] = '' + surveySubQuestionsObj['pointsBasedScoreInParent'] = '' - for ob in surveyQACollec.find({'_id': obSub['surveyId']}): - surveySubQuestionsObj['surveyName'] = ob['name'] - surveySubQuestionsObj['questionId'] = str(answer['qid']) - surveySubQuestionsObj['questionAnswer'] = ans_val - surveySubQuestionsObj['questionResponseType'] = answer['responseType'] - if answer['responseType'] == 'number': - if answer['payload']['labels']: - surveySubQuestionsObj['questionResponseLabel_number'] = responseLabel - else: - surveySubQuestionsObj['questionResponseLabel_number'] = '' + for ob in surveyCollec.find({'_id':obSub['surveyId']}): + surveySubQuestionsObj['surveyName'] = ob['name'] + surveySubQuestionsObj['questionId'] = str(answer['qid']) + surveySubQuestionsObj['questionAnswer'] = ans_val + surveySubQuestionsObj['questionResponseType'] = answer['responseType'] + if answer['responseType'] == 'number': if answer['payload']['labels']: - surveySubQuestionsObj['questionResponseLabel'] = responseLabel + surveySubQuestionsObj['questionResponseLabel_number'] = responseLabel else: - surveySubQuestionsObj['questionResponseLabel'] = '' - surveySubQuestionsObj['questionExternalId'] = quesexternalId - surveySubQuestionsObj['questionName'] = answer['payload']['question'][0] - surveySubQuestionsObj['questionECM'] = answer['evidenceMethod'] - surveySubQuestionsObj['criteriaId'] = str(answer['criteriaId']) - for crit in criteriaQACollec.find({'_id': ObjectId(answer['criteriaId'])}): - surveySubQuestionsObj['criteriaExternalId'] = crit['externalId'] - surveySubQuestionsObj['criteriaName'] = crit['name'] - surveySubQuestionsObj['completedDate'] = completedDate - surveySubQuestionsObj['createdAt'] = createdAt - surveySubQuestionsObj['updatedAt'] = updatedAt - surveySubQuestionsObj['remarks'] = answer['remarks'] - if len(answer['fileName']): - multipleFiles = None - fileCnt = 1 - for filedetail in answer['fileName']: - if fileCnt == 1: - multipleFiles = config.get('STORAGE', 'base_url') + filedetail['sourcePath'] - fileCnt = fileCnt + 1 - else: - multipleFiles = multipleFiles + ' , ' + config.get('STORAGE', 'base_url') + \ - filedetail['sourcePath'] - surveySubQuestionsObj['evidences'] = multipleFiles - surveySubQuestionsObj['evidence_count'] = len(answer['fileName']) - surveySubQuestionsObj['total_evidences'] = evidence_sub_count - # to fetch the parent question of matrix - if ans['responseType'] == 'matrix': - surveySubQuestionsObj['instanceParentQuestion'] = ans['payload']['question'][0] - surveySubQuestionsObj['instanceParentId'] = ans['qid'] - surveySubQuestionsObj['instanceParentResponsetype'] = ans['responseType'] - surveySubQuestionsObj['instanceParentCriteriaId'] = ans['criteriaId'] - for crit in criteriaQACollec.find({'_id': ObjectId(ans['criteriaId'])}): - surveySubQuestionsObj['instanceParentCriteriaExternalId'] = crit['externalId'] - surveySubQuestionsObj['instanceParentCriteriaName'] = crit['name'] - surveySubQuestionsObj['instanceId'] = instNumber - for ques in questionsQACollec.find({'_id': ObjectId(ans['qid'])}): - surveySubQuestionsObj['instanceParentExternalId'] = ques['externalId'] - surveySubQuestionsObj['instanceParentEcmSequence'] = sequence_number( - observationSubQuestionsObj['instanceParentExternalId'], answer - ) - else: - surveySubQuestionsObj['instanceParentQuestion'] = '' - surveySubQuestionsObj['instanceParentId'] = '' - surveySubQuestionsObj['instanceParentResponsetype'] = '' - surveySubQuestionsObj['instanceId'] = instNumber - surveySubQuestionsObj['instanceParentExternalId'] = '' - surveySubQuestionsObj['instanceParentEcmSequence'] = '' - surveySubQuestionsObj['user_id'] = queryJsonOutput["result"]["response"]["userName"] - surveySubQuestionsObj['channel'] = queryJsonOutput["result"]["response"]["rootOrgId"] - surveySubQuestionsObj['parent_channel'] = config.get('COMMON', 'parent_channel') - return surveySubQuestionsObj - - # fetching the question details from questions collection - def fetching_question_details(ansFn, instNumber): - for ques in questionsQACollec.find({'_id': ObjectId(ansFn['qid'])}): - # surveySubQuestionsArr.append('t') - if len(ques['options']) == 0: - try: - if len(ansFn['payload']['labels']) > 0: - finalObj = {} - finalObj = creatingObj( - ansFn, ques['externalId'], ansFn['value'], - instNumber, ansFn['payload']['labels'][0] - ) - producer.send( - (config.get("KAFKA", "druid_topic")), - json.dumps(finalObj).encode('utf-8') - ) - producer.flush() - successLogger.debug("Send Obj to Kafka") - except KeyError: - pass + surveySubQuestionsObj['questionResponseLabel_number'] = '' + if answer['payload']['labels']: + surveySubQuestionsObj['questionResponseLabel'] = responseLabel + else: + surveySubQuestionsObj['questionResponseLabel'] = '' + surveySubQuestionsObj['questionExternalId'] = quesexternalId + surveySubQuestionsObj['questionName'] = answer['payload']['question'][0] + surveySubQuestionsObj['questionECM'] = answer['evidenceMethod'] + surveySubQuestionsObj['criteriaId'] = str(answer['criteriaId']) + for crit in criteriaCollec.find({'_id':ObjectId(answer['criteriaId'])}): + surveySubQuestionsObj['criteriaExternalId'] = crit['externalId'] + surveySubQuestionsObj['criteriaName'] = crit['name'] + surveySubQuestionsObj['completedDate'] = completedDate + surveySubQuestionsObj['createdAt'] = createdAt + surveySubQuestionsObj['updatedAt'] = updatedAt + surveySubQuestionsObj['remarks'] = answer['remarks'] + if len(answer['fileName']): + multipleFiles = None + fileCnt = 1 + for filedetail in answer['fileName']: + if fileCnt == 1: + multipleFiles = 'https://storage.cloud.google.com/sl-prod-storage/' + filedetail['sourcePath'] + fileCnt = fileCnt + 1 else: - labelIndex = 0 - for quesOpt in ques['options']: - try: - if type(ansFn['value']) == str or type(ansFn['value']) == int: - if quesOpt['value'] == ansFn['value']: - finalObj = {} - finalObj = creatingObj( - ansFn, ques['externalId'], - ansFn['value'], instNumber, - ansFn['payload']['labels'][0] - ) - producer.send( - (config.get("KAFKA", "druid_topic")), - json.dumps(finalObj).encode('utf-8') - ) - producer.flush() - successLogger.debug("Send Obj to Kafka") - elif type(ansFn['value']) == list: - for ansArr in ansFn['value']: - if quesOpt['value'] == ansArr: - finalObj = {} - finalObj = creatingObj( - ansFn, ques['externalId'], ansArr, - instNumber, quesOpt['label'] - ) - producer.send( - (config.get("KAFKA", "druid_topic")), - json.dumps(finalObj).encode('utf-8') - ) - producer.flush() - successLogger.debug("Send Obj to Kafka") - except KeyError: - pass - # to check the value is null ie is not answered + multipleFiles = multipleFiles + ' , ' + 'https://storage.cloud.google.com/sl-prod-storage/' + filedetail['sourcePath'] + surveySubQuestionsObj['evidences'] = multipleFiles + surveySubQuestionsObj['evidence_count'] = len(answer['fileName']) + surveySubQuestionsObj['total_evidences'] = evidence_sub_count + # to fetch the parent question of matrix + if ans['responseType']=='matrix': + surveySubQuestionsObj['instanceParentQuestion'] = ans['payload']['question'][0] + surveySubQuestionsObj['instanceParentId'] = ans['qid'] + surveySubQuestionsObj['instanceParentResponsetype'] =ans['responseType'] + surveySubQuestionsObj['instanceParentCriteriaId'] =ans['criteriaId'] + for crit in criteriaCollec.find({'_id':ObjectId(ans['criteriaId'])}): + surveySubQuestionsObj['instanceParentCriteriaExternalId'] = crit['externalId'] + surveySubQuestionsObj['instanceParentCriteriaName'] = crit['name'] + surveySubQuestionsObj['instanceId'] = instNumber + for ques in questionsCollec.find({'_id':ObjectId(ans['qid'])}): + surveySubQuestionsObj['instanceParentExternalId'] = ques['externalId'] + surveySubQuestionsObj['instanceParentEcmSequence']= sequenceNumber( + observationSubQuestionsObj['instanceParentExternalId'], answer + ) + else: + surveySubQuestionsObj['instanceParentQuestion'] = '' + surveySubQuestionsObj['instanceParentId'] = '' + surveySubQuestionsObj['instanceParentResponsetype'] ='' + surveySubQuestionsObj['instanceId'] = instNumber + surveySubQuestionsObj['instanceParentExternalId'] = '' + surveySubQuestionsObj['instanceParentEcmSequence'] = '' + surveySubQuestionsObj['channel'] = rootOrgId + surveySubQuestionsObj['parent_channel'] = "SHIKSHALOKAM" + return surveySubQuestionsObj + + # fetching the question details from questions collection + def fetchingQuestiondetails(ansFn,instNumber): + for ques in questionsCollec.find({'_id':ObjectId(ansFn['qid'])}): + if len(ques['options']) == 0: try: - if type(ansFn['value']) == str and ansFn['value'] == '': + if len(ansFn['payload']['labels']) > 0: finalObj = {} - finalObj = creatingObj( - ansFn, ques['externalId'], ansFn['value'], - instNumber, None + finalObj = creatingObj( + ansFn,ques['externalId'], + ansFn['value'], + instNumber, + ansFn['payload']['labels'][0] ) producer.send( - (config.get("KAFKA", "druid_topic")), + (config.get("KAFKA", "kafka_survey_druid_topic")), json.dumps(finalObj).encode('utf-8') ) producer.flush() successLogger.debug("Send Obj to Kafka") - except KeyError: - pass + except KeyError : + pass + else: + labelIndex = 0 + for quesOpt in ques['options']: + try: + if type(ansFn['value']) == str or type(ansFn['value']) == int: + if quesOpt['value'] == ansFn['value'] : + finalObj = {} + finalObj = creatingObj( + ansFn,ques['externalId'], + ansFn['value'], + instNumber, + ansFn['payload']['labels'][0] + ) + producer.send( + (config.get("KAFKA", "kafka_survey_druid_topic")), + json.dumps(finalObj).encode('utf-8') + ) + producer.flush() + successLogger.debug("Send Obj to Kafka") + elif type(ansFn['value']) == list: + for ansArr in ansFn['value']: + if quesOpt['value'] == ansArr: + finalObj = {} + finalObj = creatingObj( + ansFn,ques['externalId'], + ansArr, + instNumber, + quesOpt['label'] + ) + producer.send( + (config.get("KAFKA", "kafka_survey_druid_topic")), + json.dumps(finalObj).encode('utf-8') + ) + producer.flush() + successLogger.debug("Send Obj to Kafka") + except KeyError: + pass + + #to check the value is null ie is not answered + try: + if type(ansFn['value']) == str and ansFn['value'] == '': + finalObj = {} + finalObj = creatingObj( + ansFn,ques['externalId'], ansFn['value'], instNumber, None + ) + print(finalObj) + producer.send( + (config.get("KAFKA", "kafka_survey_druid_topic")), + json.dumps(finalObj).encode('utf-8') + ) + producer.flush() + successLogger.debug("Send Obj to Kafka") + except KeyError: + pass - if ans['responseType'] == 'text' or ans['responseType'] == 'radio' or \ - ans['responseType'] == 'multiselect' or ans['responseType'] == 'slider' or \ - ans['responseType'] == 'number' or ans['responseType'] == 'date': - inst_cnt = '' - fetching_question_details(ans, inst_cnt) - elif ans['responseType'] == 'matrix' and len(ans['value']) > 0: - inst_cnt = 0 - for instances in ans['value']: - inst_cnt = inst_cnt + 1 - for instance in instances.values(): - fetching_question_details(instance, inst_cnt) + if ( + ans['responseType'] == 'text' or ans['responseType'] == 'radio' or + ans['responseType'] == 'multiselect' or ans['responseType'] == 'slider' or + ans['responseType'] == 'number' or ans['responseType'] == 'date' + ): + inst_cnt = '' + fetchingQuestiondetails(ans, inst_cnt) + elif ans['responseType'] == 'matrix' and len(ans['value']) > 0: + inst_cnt =0 + for instances in ans['value']: + inst_cnt = inst_cnt + 1 + for instance in instances.values(): + fetchingQuestiondetails(instance,inst_cnt) - cursorMongo.close() + cursorMongo.close() except Exception as e: errorLogger.error(e, exc_info=True) -try: +try : @app.agent(rawTopicName) - async def surveyFaust(consumer): - async for msg in consumer: + async def surveyFaust(consumer) : + async for msg in consumer : msg_val = msg.decode('utf-8') msg_data = json.loads(msg_val) successLogger.debug("========== START OF SURVEY SUBMISSION ========") @@ -395,5 +404,6 @@ async def surveyFaust(consumer): except Exception as e: errorLogger.error(e, exc_info=True) + if __name__ == '__main__': app.main()