Skip to content

Commit

Permalink
WIP step 1
Browse files Browse the repository at this point in the history
NEWKWDataGridOptimizer
- duplication de KWDataGridOptimizer temporaire, pour tester un refactoring majeur
- appele dans KWDataGridOptimizer::OptimizeDataGrid selon un booleen bNewProto

CCCoclusteringOptimizer
- supression de la classe (pour l'instant commentee)
- remplacee en redesignant l'interaction entre KWAttributeStats et KWDataGridOptimizer pour la gestion des HandleOptimizationStep
  - class KWAttributeSubsetStats : public KWDataPreparationStats
    - methode virtuelle HandleOptimizationStep
  - KWDataGridOptimizer
    - Set|GetAttributeSubsetStats: pour parametrer le "builder" qui demande l'optimisation, et lui rediriger les HandleOptimizationStep
  - CCCoclusteringBuilder utilise desormais directement un KWDataGridOptimizer

Tests elementaires
  • Loading branch information
marcboulle committed Oct 11, 2023
1 parent 60538e6 commit c9f920c
Show file tree
Hide file tree
Showing 11 changed files with 3,044 additions and 10 deletions.
6 changes: 6 additions & 0 deletions src/Learning/KWDataPreparation/KWAttributeSubsetStats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,12 @@ KWDataGrid* KWAttributeSubsetStats::CreateDataGrid(const KWTupleTable* tupleTabl
return dataGrid;
}

void KWAttributeSubsetStats::HandleOptimizationStep(const KWDataGrid* optimizedDataGrid,
const KWDataGrid* initialGranularizedDataGrid,
boolean bIsLastSaving) const
{
}

boolean KWAttributeSubsetStats::GetPregranularizedNumericalAttributes()
{
return bPregranularizedNumericalAttributes;
Expand Down
5 changes: 5 additions & 0 deletions src/Learning/KWDataPreparation/KWAttributeSubsetStats.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,11 @@ class KWAttributeSubsetStats : public KWDataPreparationStats
// Memoire: appartient a l'appelant
KWDataGrid* CreateDataGrid(const KWTupleTable* tupleTable);

// Methode appelee lors de l'optimisation a chaque etape d'optimisation
// Implementation vide par defaut
virtual void HandleOptimizationStep(const KWDataGrid* optimizedDataGrid,
const KWDataGrid* initialGranularizedDataGrid, boolean bIsLastSaving) const;

// Parametrage avance
// Pre-granularisation des attributs numeriques cible (regression) et des attributs numeriques explicatifs en
// analyse non supervisee (co-clustering) Cette pre-granularisation permet :
Expand Down
2 changes: 1 addition & 1 deletion src/Learning/KWDataPreparation/KWDataGridManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1900,7 +1900,7 @@ void KWDataGridManager::BuildUnivariateDataGridFromGranularizedPartition(KWDataG
KWDGAttribute* targetAttribute;
KWDGAttribute* sourceAttribute;

require(0 < nAttributeIndex and nAttributeIndex < sourceDataGrid->GetAttributeNumber());
require(0 <= nAttributeIndex and nAttributeIndex < sourceDataGrid->GetAttributeNumber());

// Initialisation de la grille cible a une variable
InitialiseDataGrid(sourceDataGrid, univariateTargetDataGrid, 1);
Expand Down
37 changes: 37 additions & 0 deletions src/Learning/KWDataPreparation/KWDataGridOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ KWDataGridOptimizer::KWDataGridOptimizer()
// CH IV Begin
initialVarPartDataGrid = NULL;
// CH IV end
attributeSubsetStatsHandler = NULL;
}

KWDataGridOptimizer::~KWDataGridOptimizer() {}
Expand All @@ -29,6 +30,7 @@ void KWDataGridOptimizer::Reset()
// CH IV Begin
initialVarPartDataGrid = NULL;
// CH IV end
attributeSubsetStatsHandler = NULL;
}

void KWDataGridOptimizer::SetDataGridCosts(const KWDataGridCosts* kwdgcCosts)
Expand Down Expand Up @@ -103,6 +105,27 @@ double KWDataGridOptimizer::OptimizeDataGrid(const KWDataGrid* initialDataGrid,
int nLastExploredGranularity;
ALString sTmp;

// CH IV Refactoring : DDDDD
// Test du remplacement de la methode actuelle, par son proto
boolean bNewPROTO = true;
if (bNewPROTO)
{
NEWKWDataGridOptimizer newDataGridOptimizer;

// Recopie du parametrage courant
newDataGridOptimizer.Reset();
newDataGridOptimizer.GetParameters()->CopyFrom(&optimizationParameters);
newDataGridOptimizer.SetClassStats(GetClassStats());
newDataGridOptimizer.SetDataGridCosts(GetDataGridCosts());
newDataGridOptimizer.bCleanNonInformativeVariables = bCleanNonInformativeVariables;
newDataGridOptimizer.initialVarPartDataGrid = initialVarPartDataGrid;
newDataGridOptimizer.SetAttributeSubsetStats(attributeSubsetStatsHandler);

// Optimisation
dBestCost = newDataGridOptimizer.OptimizeDataGrid(initialDataGrid, optimizedDataGrid);
return dBestCost;
}

dGranularityBestCost = DBL_MAX;
dBestMergedCost = dGranularityBestCost;
dTotalTime = 0;
Expand Down Expand Up @@ -689,6 +712,20 @@ void KWDataGridOptimizer::HandleOptimizationStep(const KWDataGrid* optimizedData
const KWDataGrid* initialGranularizedDataGrid,
boolean bIsLastSaving) const
{
// Integration de la granularite
if (attributeSubsetStatsHandler != NULL)
attributeSubsetStatsHandler->HandleOptimizationStep(optimizedDataGrid, initialGranularizedDataGrid,
bIsLastSaving);
}

void KWDataGridOptimizer::SetAttributeSubsetStats(const KWAttributeSubsetStats* attributeSubsetStats)
{
attributeSubsetStatsHandler = attributeSubsetStats;
}

const KWAttributeSubsetStats* KWDataGridOptimizer::GetAttributeSubsetStats()
{
return attributeSubsetStatsHandler;
}

void KWDataGridOptimizer::PostOptimizeGranularity(const KWDataGrid* initialDataGrid, KWDataGrid* optimizedDataGrid,
Expand Down
9 changes: 9 additions & 0 deletions src/Learning/KWDataPreparation/KWDataGridOptimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class CCCoclusteringOptimizer;
#include "SortedList.h"
#include "Profiler.h"
#include "Timer.h"
#include "NEWKWDataGridOptimizer.h"

//////////////////////////////////////////////////////////////////////////////////
// Classe KWDataGridOptimizer
Expand Down Expand Up @@ -93,6 +94,11 @@ class KWDataGridOptimizer : public Object
virtual void HandleOptimizationStep(const KWDataGrid* optimizedDataGrid,
const KWDataGrid* initialGranularizedDataGrid, boolean bIsLastSaving) const;

// Parametrage du contexte de gestion de la partie anytime de l'optimisation
// Permet de rediriger la methode HandleOptimizationStep vers celle du attributeSubsetStats
void SetAttributeSubsetStats(const KWAttributeSubsetStats* attributeSubsetStats);
const KWAttributeSubsetStats* GetAttributeSubsetStats();

//////////////////////////////////////////////////////////////////
// Gestion d'un profiler dedie a l'optimisation des grilles
// Ce profiler doit etre demarre depuis le point d'entree de l'optimisation,
Expand Down Expand Up @@ -168,6 +174,9 @@ class KWDataGridOptimizer : public Object
// Epsilon d'optimisation
double dEpsilon;

// Contexte de gestion de la partie anytime de l'optimisation
const KWAttributeSubsetStats* attributeSubsetStatsHandler;

// Profiler
static Profiler profiler;
};
Expand Down
Loading

0 comments on commit c9f920c

Please sign in to comment.