Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream-remote/master' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
gfournier committed Aug 20, 2020
2 parents 9496bbc + 1b82e8d commit c2ab29d
Show file tree
Hide file tree
Showing 30 changed files with 1,498 additions and 305 deletions.
8 changes: 6 additions & 2 deletions .github/workflows/pythonapp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ jobs:
strategy:
matrix:
python-version: [3.6, 3.7, 3.8]
scikitlearn-version: [0.22.2]
pandas-version: [0.25.3]

steps:
- uses: actions/checkout@v2
Expand All @@ -53,6 +55,8 @@ jobs:
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install -r requirements-test.txt
pip install pandas==${{ matrix.pandas-version }}
pip install scikit-learn==${{ matrix.scikitlearn-version }}
- name: Install NLTK
run: |
python -m nltk.downloader punkt
Expand All @@ -66,8 +70,8 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
scikitlearn-version: [0.21.3, 0.22.1]
pandas-version: [0.25.3]
scikitlearn-version: [0.21.3, 0.22.2, 0.23.1]
pandas-version: [0.25.3, 1.0.5]

steps:
- uses: actions/checkout@v2
Expand Down
12 changes: 12 additions & 0 deletions .readthedocs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
version: 2

python:
version: 3.7
install:
- requirements: requirements.txt
- requirements: requirements-doc.txt

sphinx:
configuration: docs/conf.py

formats: all
2 changes: 1 addition & 1 deletion aikit/ml_machine/ml_machine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1287,7 +1287,7 @@ def guess_scoring(self, auto_ml_config):
self.scoring = ["accuracy", "log_loss_patched", "avg_roc_auc", "f1_macro"]

elif auto_ml_config.type_of_problem == en.TypeOfProblem.CLUSTERING:
self.scoring = ["silhouette", "calinski_harabaz", "davies_bouldin"]
self.scoring = ["silhouette", "calinski_harabasz", "davies_bouldin"]

else:
self.scoring = ["r2", "neg_mean_squared_error", "neg_mean_absolute_error"]
Expand Down
2 changes: 1 addition & 1 deletion aikit/ml_machine/ml_machine_guider.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def get_metric_default_transformation(metric_name):
# Metric where 'perfection' is 0 => focus on differences with 0
return lambda x: -np.log10(-x)

elif metric_name in {"calinski_harabaz"}:
elif metric_name in {"calinski_harabasz"}:
return lambda x: np.log10(1 + x)

else:
Expand Down
198 changes: 0 additions & 198 deletions aikit/ml_machine/model_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,201 +597,3 @@ def _rec_convert_graph_to_code(
composition_already_done=composition_already_done,
)


# In[] : Old functions


def convert_graph_to_code_OLD(G, all_models_params):
""" convertion of Graphical model into a json representation
Parameters
----------
G : nx.DiGraph
graph representing the model, each node should be a 2-uple : (name_of_step,name_of_model)
all_models_params : dict
parameters of each models, key = node, value = dictionnary of hyper-parameters for this node
Returns
-------
json like python object representing the model
"""
all_params = {}
for node in G.nodes:
all_params[node] = (node[1][1], all_models_params.get(node, {}))

assert_model_graph_structure(G)

return _rec_convert_graph_to_code_OLD(G, all_params)


def _rec_convert_graph_to_code_OLD(G, all_params):
""" recursive function to convert a graph into a json representation """
if len(G.nodes) == 0:
return {}

### 1) Find First composition node
has_composition = False
for node in gh.iter_graph(G):
if StepCategories.is_composition_step(node[0]):
has_composition = True
break

return_gpipe = not has_composition

if has_composition:
### If there is a composition node, I need to split between what is above and what is bellow
predecessors = gh.get_all_predecessors(G, node)
successors = gh.get_all_successors(G, node)

if not gh.is_it_a_partition(list(G.nodes), [predecessors, [node], successors]):
raise ValueError("Incorrect graph, wrong split around node %s" % str(node))

if len(successors) == 0:
# If nothing bellow, I'll be able to return something
return_gpipe = True

if return_gpipe:

if len(G.nodes) > 1:
### I'll create a GraphPipeline object

edges = gh.edges_from_graph(G)

model_name_mapping = _create_name_mapping(list(G.nodes))
# each node in graph will be mapped to a name within the GraphPipeline

models = {model_name_mapping[n]: all_params[n] for n in G.nodes}

edges = [tuple((model_name_mapping[e] for e in edge)) for edge in edges]

return (SpecialModels.GraphPipeline, {"models": models, "edges": edges})

else:
### Otherwise it is just the model_name with its parameters
return node[1][1], all_params[list(G.nodes)[0]]

G_above = G.subgraph(predecessors + [node])
G_bellow = G.subgraph(successors)

connected_Gbellow = gh.get_connected_graphs(G_bellow)
if len(connected_Gbellow) == 1:
# what is bellow is a 'connected graph' : it means that the composition need should be applied to One model
all_params[node] = _rec_convert_graph_to_code_OLD(G_bellow, all_params)

else:
# otherwise, the composition will be applied to a list of models
all_params[node] = [_rec_convert_graph_to_code_OLD(g, all_params) for g in connected_Gbellow]

return _rec_convert_graph_to_code_OLD(G_above, all_params)


def convert_graph_to_code_OLD2(Graph, all_models_params, also_returns_mapping=False):
""" convertion of a Graph representing a model into its json code
Parameter
---------
Graph : nx.DirectGraph
the graph of the model, each node as the form ( step, (step, klass) )
all_models_params : dict
hyperparameters of each model, keys = node of Graph, values = corresponding hyper-parameters
also_returns_mapping : boolean, default = False
if True will return a dictionnary with 'name_mapping' and 'json_code' as its key.
So that the name in the GraphPipeline can be accessed
otherwise will just return the json_code
Return
------
a json-like object representing the model than can be translated into a model using 'sklearn_model_from_param'
"""
models_dico = {node: (_klass_from_node(node), all_models_params[node]) for node in Graph.nodes}

model_name_mapping = _create_name_mapping(Graph.nodes)

rec_result = _rec_convert_graph_to_code_OLD(
Graph=Graph, all_models_params=all_models_params, models_dico=models_dico, model_name_mapping=model_name_mapping
)

if not also_returns_mapping:
return rec_result
else:
return {"name_mapping": model_name_mapping, "json_code": rec_result}


def _rec_convert_graph_to_code_OLD2(Graph, all_models_params, models_dico, model_name_mapping=None):
""" recursive function used to convert a Graph into a json code
See convert_graph_to_code
"""

### ** only one node in Graph : I'll return what was saved in models_dico ** ###
if len(Graph.nodes) == 1:
node = list(Graph.nodes)[0]
return models_dico[node]

node = _find_first_composition_node(Graph)

if node is not None:
predecessors = gh.get_all_predecessors(Graph, node)
successors = gh.get_all_successors(Graph, node)

if not gh.is_it_a_partition(list(Graph.nodes), [predecessors, [node], successors]):
raise ValueError("Incorrect graph, wrong split around node %s" % str(node))
else:
predecessors = []
successors = []

if node is None or len(successors) == 0:
### ** It's means I'll return a GraphPipeline ** ###
edges = gh.edges_from_graph(Graph)

if model_name_mapping is None:
model_name_mapping = _create_name_mapping(list(Graph.nodes))
# each node in graph will be mapped to a name within the GraphPipeline

models = {model_name_mapping[n]: models_dico[n] for n in Graph.nodes}

edges = [tuple((model_name_mapping[e] for e in edge)) for edge in edges]

return (SpecialModels.GraphPipeline, {"models": models, "edges": edges})

Graph_bellow = Graph.subgraph(successors)

connected_Gbellow = gh.get_connected_graphs(Graph_bellow)

if len(predecessors) == 0 and len(connected_Gbellow) > 1:

return (
_klass_from_node(node),
[
_rec_convert_graph_to_code_OLD2(Gb, all_models_params, models_dico, model_name_mapping)
for Gb in connected_Gbellow
],
all_models_params[node],
)

elif len(predecessors) == 0 and len(connected_Gbellow) == 1:

return (
_klass_from_node(node),
_rec_convert_graph_to_code_OLD2(Graph_bellow, all_models_params, models_dico, model_name_mapping),
all_models_params[node],
)

else:

G_bellow_and_node = Graph.subgraph([node] + successors)
G_above = Graph.subgraph(predecessors + [node])

models_dico[node] = _rec_convert_graph_to_code_OLD2(
G_bellow_and_node, all_models_params, models_dico, model_name_mapping
)

return _rec_convert_graph_to_code(G_above, all_models_params, models_dico, model_name_mapping)
22 changes: 10 additions & 12 deletions aikit/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,21 +56,19 @@ def __init__(
verbose=0,
random_state=None,
copy_x=True,
n_jobs=None,
algorithm="auto",
):
super(KMeansWrapper, self).__init__(
n_clusters,
init,
n_init,
max_iter,
tol,
precompute_distances,
verbose,
random_state,
copy_x,
n_jobs,
algorithm,
n_clusters=n_clusters,
init=init,
n_init=n_init,
max_iter=max_iter,
tol=tol,
precompute_distances=precompute_distances,
verbose=verbose,
random_state=random_state,
copy_x=copy_x,
algorithm=algorithm,
)

def fit(self, X, y=None, sample_weight=None):
Expand Down
Loading

0 comments on commit c2ab29d

Please sign in to comment.