Update scalg.py

Yuanli-Liu · Aug 22, 2020 · cd816d1 · cd816d1
1 parent dd62adf
commit cd816d1
Showing 1 changed file with 33 additions and 18 deletions.
diff --git a/scalg.py b/scalg.py
@@ -1,18 +1,38 @@
+'''
+developed by: markmelnic
+original repo: https://github.com/markmelnic/Scoring-Algorithm
 
-def score(source_data : list, weights : list, *args) -> list:
+Analyse data using a range based percentual proximity algorithm
+and calculate the linear maximum likelihood estimation.
+The basic principle is that all values supplied will be broken
+down to a range from 0 to 1 and each column's score will be added
+up to get the total score.
+
+==========
+Example for data of vehicles
+price|mileage|registration_year
+20k  |60k    |2012
+22k  |50k    |2011
+23k  |90k    |2015
+16k  |210k   |2010
+
+We want the vehicle with the lowest price,
+lowest mileage but newest registration year.
+Thus the weights for each column are as follows:
+[0, 0, 1]
+
+>>> procentual_proximity([[20, 60, 2012],[23, 90, 2015],[22, 50, 2011]], [0, 0, 1])
+[[20, 60, 2012, 2.0], [23, 90, 2015, 1.0], [22, 50, 2011, 1.3333333333333335]]
+'''
+
+
+def procentual_proximity(source_data : list, weights : list) -> list:
 
     '''
-    int list - weights
+    weights - int list
     possible values - 0 / 1
     0 if lower values have higher weight in the data set
     1 if higher values have higher weight in the data set
-    ==========
-    Optional arguments:
-    str - "score_lists"
-    get a list with all the scores for each piece of data
-
-    str - "scores"
-    get only the final scores for each data set
     '''
 
     # getting data
@@ -22,16 +42,18 @@ def score(source_data : list, weights : list, *args) -> list:
             try:
                 data_lists[i].append(float(item[i]))
             except IndexError:
+                # generate corresponding number of lists
                 data_lists.append([])
                 data_lists[i].append(float(item[i]))
 
     score_lists = []
-    # calculating price score
+    # calculating each score
     for dlist, weight in zip(data_lists, weights):
         mind = min(dlist)
         maxd = max(dlist)
 
         score = []
+        # for weight 0 score is 1 - actual score
         if weight == 0:
             for item in dlist:
                 try:
@@ -46,15 +68,12 @@ def score(source_data : list, weights : list, *args) -> list:
                 except ZeroDivisionError:
                     score.append(0)
 
+        # weight not 0 or 1
         else:
             raise ValueError("Invalid weight of %f provided" % (weight))
 
         score_lists.append(score)
 
-    # return score lists
-    if "score_lists" in args:
-        return score_lists
-
     # initialize final scores
     final_scores = [0 for i in range(len(score_lists[0]))]
 
@@ -63,10 +82,6 @@ def score(source_data : list, weights : list, *args) -> list:
         for j, ele in enumerate(slist):
             final_scores[j] = final_scores[j] + ele
 
-    # return only scores
-    if "scores" in args:
-        return final_scores
-
     # append scores to source data
     for i, ele in enumerate(final_scores):
         source_data[i].append(ele)