compare unified job deplyments to DFG job deplyments

- compare per rhos XY.Z - create a spreadsheet (tab per unified job) - for each tab list DFG jobs according to the similarty mesure
RedHatCRE · Dec 7, 2022 · 469c705 · 469c705
1 parent 1bedac0
commit 469c705
Show file tree

Hide file tree

Showing 5 changed files with 290 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -6,6 +6,6 @@
 ### <span stype="color:blue">[EoD-stuff](EoD-stuff/README.md): Scripts for making life easier to the Engineer on Duty :)</span>
 ### <span stype="color:blue">[provision](provision/README.md): Different implementation of ansible roles and playbooks to automate things.</span>
 Engineer on Duty
-
+### <span stype="color:blue">[similarity-comparison](similarity-comparison/README.md): Scripts for comparison of infrared based jenkins jobs</span>
 
 ![](https://github.com/RedHatCRE/toolbox/workflows/tests/badge.svg)
diff --git a/similarity-comparison/.gitignore b/similarity-comparison/.gitignore
@@ -0,0 +1,3 @@
+jjs.db
+jjs.xlsx
+venv/**
diff --git a/similarity-comparison/README.md b/similarity-comparison/README.md
@@ -0,0 +1,9 @@
+HOWTO
+-----
+virtualenv venv
+. ./venv/bin/activate
+pip install -r requirements.txt
+python similarity-comparison.py 
+
+
+
diff --git a/similarity-comparison/requirements.txt b/similarity-comparison/requirements.txt
@@ -0,0 +1,3 @@
+requests
+xlsxwriter
+scikit-learn
diff --git a/similarity-comparison/similarity_comparison.py b/similarity-comparison/similarity_comparison.py
@@ -0,0 +1,274 @@
+# Copyright 2021 David Sariel
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import configparser
+import json
+import re
+import requests
+import sqlite3
+import xlsxwriter
+
+from os.path import expanduser
+from sklearn.feature_extraction.text import TfidfVectorizer
+
+
+httpRequest = {
+    'requestJobsAndBuildInfo':
+        "/api/json/?tree=jobs[name,lastBuild[result,number,timestamp]]",
+    'requestJobs':
+        "/api/json?tree=jobs[name]",
+    'requestStableBuildArtifact':
+        "/job/{jobName}/lastStableBuild/artifact/{artifactPath}",
+    'requestArtifact':
+        "/job/{jobName}/lastSuccessfulBuild/artifact/{artifactPath}"
+}
+
+
+# JJSC - Jenkins Jobs Similarity Computation
+class JJSC(object):
+    def __init__(self, credentialsPath, artifactPath):
+        configParser = configparser.RawConfigParser()
+        print(configParser.read(credentialsPath))
+        sectionName = "jenkins"
+        dictionary = dict(configParser.items(sectionName))
+
+        self.url = dictionary['url']
+        self.artifactPath = artifactPath
+        self.credentials = (dictionary['user'], dictionary['password'])
+
+        # create (if !exists) a db to store <jobName, artifact>
+        self.dbcon = sqlite3.connect('jjs.db')
+        print("Connected to SQLite jjs.db")
+        cursor = self.dbcon.cursor()
+        cursor.execute('''CREATE TABLE IF NOT EXISTS jjs
+                            ( jobName text,
+                              artifatcContent text,
+                              artifactCtntNrmlzd text )''')
+        self.dbcon.commit()
+        cursor.close()
+        print("jjs table exists in jjs.db")
+
+        self.workbook = xlsxwriter.Workbook('jjs.xlsx')
+
+    def __del__(self):
+        if self.dbcon:
+            self.dbcon.close()
+            print("The SQLite connection is closed")
+        self.workbook.close()
+
+    def _insertDataIntoTable(self, jobName, artifatcContent):
+        try:
+            cursor = self.dbcon.cursor()
+            sqlite_insert_with_param = """INSERT INTO jjs
+                              (jobName, artifatcContent)
+                              VALUES (?, ?);"""
+            data_tuple = (jobName, artifatcContent)
+            cursor.execute(sqlite_insert_with_param, data_tuple)
+            self.dbcon.commit()
+            cursor.close()
+            return 0
+
+        except sqlite3.Error as error:
+            print("Failed to insert into sqlite table", error)
+            return -1
+
+    def populateDB(self):
+        # get all Jobs
+        request = requests.get(self.url + httpRequest['requestJobs'],
+                               verify=False,
+                               auth=self.credentials)
+        jobsInJSON = json.loads(request.text)
+        print(json.dumps(jobsInJSON, indent=4, sort_keys=True))
+
+        skipList = ["util"]
+
+        # get and store an artifact (if found)
+        okCounter = 0
+        insertCounter = 0
+        for element in jobsInJSON['jobs']:
+            print(element['name'])
+            jobName = element['name']
+            if jobName in skipList:
+                continue
+            requestStr = self.url + httpRequest['requestArtifact'].format(
+                jobName=jobName,
+                artifactPath=self.artifactPath)
+            request = requests.get(requestStr, verify=False,
+                                   auth=self.credentials)
+            print(requestStr)
+            if request.ok:
+                okCounter = okCounter + 1
+                if self._insertDataIntoTable(jobName, request.text) >= 0:
+                    insertCounter = insertCounter + 1
+
+        print("From populateDB")
+        print("okCounter: " + str(okCounter))
+        print("insertCounter: " + str(insertCounter))
+        print("number of jobs: " + str(len(jobsInJSON['jobs'])))
+        assert (okCounter == insertCounter)
+
+    def _normilizeArtifact(self, artifact):
+        regex = r".*infrared (tripleo-undercloud|tripleo-overcloud) .*\\*"
+        plugin_names = "(tripleo-undercloud|tripleo-overcloud)"
+        regex = r".*infrared " + plugin_names + " .*(([\r\n]*).*){4}"
+        matches = re.finditer(regex, artifact, re.MULTILINE)
+        normalizedArtifact = ""
+        for matchNum, match in enumerate(matches, start=1):
+            print(
+                "Match {matchNum} was found at {start}-{end}: {match}".format(
+                    matchNum=matchNum,
+                    start=match.start(),
+                    end=match.end(),
+                    match=match.group()))
+            normalizedArtifact = normalizedArtifact + "\n" + match.group()
+
+        # TODO: filter out tempest invocation - DONE
+        return (normalizedArtifact)
+
+    def _extractVersionFromJobName(self, jobName):
+        # matches XY.Z XY XY_Z in job names
+        REGEXP = r'\s*([\d(.|_)]+)(_compact|-compact|_director|-director)\s*'
+
+        version = re.search(REGEXP, jobName).group(1)
+        version = version.replace("_", ".")  # for jobs with XY_Z
+
+        return version
+
+    def _extractIPVersionFromJobName(self, jobName):
+        # matches XY.Z XY XY_Z in job names
+        REGEXP = r".*ipv([\d]+).*"
+
+        try:
+            version = re.search(REGEXP, jobName).group(1)
+        except AttributeError:
+            version = "NA"
+
+        return version
+
+    # return true if artifact contains any of filter out criteria
+    def _isFilteredOut(self, articact):
+        filter = ["infrared tripleo-inventory",
+                  "infrared workspace import",
+                  "sshpass -p stack ssh -o UserKnownHostsFile=/dev/null",
+                  "infrared tripleo-upgrade"]
+
+        articactString = str(articact)
+
+        intersestoin = [value for value in filter if value in articactString]
+
+        return (len(intersestoin) > 0)
+
+    def analyseJJSTable(self):
+        cursor = self.dbcon.cursor()
+
+        # fetch unified jobs
+        sql_command = \
+            'SELECT DISTINCT * FROM jjs WHERE jobName LIKE ' + \
+            '\'%unified%\' AND jobName LIKE \'%director%\' ORDER BY jobName'
+        cursor.execute(sql_command)
+        unifiedJobs = cursor.fetchall()
+        print("Total of unified jobs are:  ", len(unifiedJobs))
+
+        # fetch other director jobs (including unified ones) to compare
+        # against the unified jobs
+        sql_command = \
+            'SELECT DISTINCT * FROM jjs WHERE jobName LIKE ' + \
+            '\'%director%\' AND jobName NOT LIKE \'%compact%\''
+        cursor.execute(sql_command)
+        directorJobs = cursor.fetchall()
+        print("Total of director jobs are:  ", len(directorJobs))
+
+        unifiedJobsCounter = 0
+        cell_format = self.workbook.add_format(
+            {'bold': True, 'font_color': 'red'})
+        for rowUnified in unifiedJobs:
+            jobNameUnified = str(rowUnified[0])
+            print(len(unifiedJobs))
+            try:
+                unifiedJobsCounter += 1
+                worksheet = self.workbook.add_worksheet(
+                    jobNameUnified[1:28] + "--" + str(unifiedJobsCounter))
+                worksheet.set_column(0, 0, len(jobNameUnified))
+                worksheet.write(0, 0, jobNameUnified, cell_format)
+                row = 1
+            except xlsxwriter.exceptions.DuplicateWorksheetName:
+                continue
+            for rowDirector in directorJobs:
+                jobNameDirector = str(rowDirector[0])
+                releaseUnified = self._extractVersionFromJobName(
+                    jobNameUnified)
+                releaseDirector = self._extractVersionFromJobName(
+                    jobNameDirector)
+                ipVersionUnifed = self._extractIPVersionFromJobName(
+                    jobNameUnified)
+                ipVersionDirector = self._extractIPVersionFromJobName(
+                    jobNameDirector)
+                # if releaseUnified not in ["16.1", "16.2"]:
+                #     continue
+
+                if jobNameUnified != jobNameDirector and \
+                        releaseUnified == releaseDirector and \
+                        ipVersionUnifed == ipVersionDirector:
+                    artifactUnified = str(rowUnified[1])
+                    artifactDirector = str(rowDirector[1])
+                    if self._isFilteredOut(artifactDirector):
+                        continue
+                    normalizedUnified = self._normilizeArtifact(
+                        artifactUnified)
+                    normalizedDirector = self._normilizeArtifact(
+                        artifactDirector)
+                    try:
+                        tfidf = TfidfVectorizer().fit_transform(
+                            [normalizedUnified, normalizedDirector])
+                        # no need to normalize, since Vectorizer will return
+                        # normalized tf-idf
+                        pairwise_similarity = tfidf * tfidf.T
+                    except Exception:
+                        print("Can not compare " + rowUnified[0] + " and " +
+                              rowDirector[0] + "\n")
+                    threshold = pairwise_similarity.data.min()
+
+                    if threshold >= 0.0:
+                        wordsUnified = set(normalizedUnified.split())
+                        wordsDirector = set(normalizedDirector.split())
+                        unifiedUniques = set(
+                            sorted(wordsUnified.difference(wordsDirector)))
+                        directorUniques = set(
+                            sorted(wordsDirector.difference(wordsUnified)))
+                        uniques = unifiedUniques.union(directorUniques)
+                        print(jobNameUnified + "," + str(unifiedUniques))
+                        print(jobNameDirector + "," + str(directorUniques))
+                        fstr = 'Total uniques: {}, Pairwise Similarity: {}\n'
+                        print(fstr.format(len(uniques), threshold))
+                        try:
+                            worksheet.set_column(row, 0, len(jobNameDirector))
+                            worksheet.write(row, 0, jobNameDirector)
+
+                            threshold = round(threshold, 3)
+                            worksheet.set_column(row, 1, len(str(threshold)))
+                            worksheet.write(row, 1, str(threshold))
+
+                            row = row + 1
+                        except Exception as e:
+                            print(e)
+                            continue
+        cursor.close()
+
+
+credentialsPath = expanduser("~") + '/.config/jenkins_jobs/jenkins_jobs.ini'
+artifactPath = '.sh/run.sh'
+jjsc = JJSC(credentialsPath, artifactPath)
+jjsc.populateDB()
+jjsc.analyseJJSTable()
+del jjsc