Skip to content

Commit

Permalink
Merge pull request #88 from KhiopsML/69-refactor-coclustering-iv-opti…
Browse files Browse the repository at this point in the history
…mization-algorithms

69 refactor coclustering iv optimization algorithms
  • Loading branch information
marcboulle authored Oct 6, 2023
2 parents 85e7c2d + 4154648 commit 008d8b4
Show file tree
Hide file tree
Showing 494 changed files with 342,332 additions and 319,784 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ repos:
hooks:
- id: pretty-format-json
args: [--autofix, --no-ensure-ascii, --no-sort-keys]
exclude: test/LearningTest/
- id: trailing-whitespace
types_or: [c, c++, java, python, markdown]
- repo: https://github.com/python-jsonschema/check-jsonschema
Expand Down
12 changes: 0 additions & 12 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -225,18 +225,6 @@ function(set_unix_khiops_options target)
#
target_compile_options(${target} PRIVATE $<$<CONFIG:RELEASE,RELWITHDEBINFO>:-fno-rtti> -fno-exceptions)

# Special options for clang, we remove the following too verbose warnings:
#
# - unsequenced modification
# - overloaded virtual functions
# - inconsistent missing override
#
# These warnings should be add back, but we have a lot of work in Khiops
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
target_compile_options(${target} PRIVATE -Wno-unsequenced -Wno-overloaded-virtual
-Wno-inconsistent-missing-override)
endif()

endfunction(set_unix_khiops_options)

# Sets the compiling options for MSVC
Expand Down
2 changes: 2 additions & 0 deletions src/Learning/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ add_subdirectory(MODL_Coclustering)
add_subdirectory(SNBPredictor)
add_subdirectory(samples/sample3)

add_subdirectory(KWTest)

# build norm.jar
if(BUILD_JARS)
include(UseJava)
Expand Down
4 changes: 2 additions & 2 deletions src/Learning/DTForest/DTDecisionTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class DTDecisionTree : public KWLearningReport
WithReplacementAdaBoost, // tirage avec remise, choix pondere (algo AdaBoost)
};

boolean ComputeStats();
boolean ComputeStats() override;

/// Duplication de l'arbre
DTDecisionTree* Clone();
Expand Down Expand Up @@ -167,7 +167,7 @@ class DTDecisionTree : public KWLearningReport
void WriteNodes(ostream&, const ObjectDictionary*);

// Ecriture d'un rapport (accessible uniquement si statistiques calculees)
void WriteReport(ostream& ost);
void WriteReport(ostream& ost) override;

void WriteDTArrayLineReport(ostream& ost, const ALString& sTitle, ObjectArray* oaLearningReports,
DTDecisionTree* tree);
Expand Down
2 changes: 1 addition & 1 deletion src/Learning/DTForest/DTDecisionTreeDatabaseObject.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class DTDecisionTreeDatabaseObject : public Object
void SetTargetCorrectlyPredicted(boolean);

static void WriteHeaderLine(ostream& ost);
void Write(ostream& ost) const;
void Write(ostream& ost) const override;

// probas correspondant au noeud auquel appartient l'instance
const ContinuousVector* GetTrainNodeProbs() const;
Expand Down
12 changes: 6 additions & 6 deletions src/Learning/DTForest/DTGrouperMODLInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class DTGrouperMODLTwoClasses : public KWDiscretizerMODL // public DTDiscretizer
IntVector*& ivGroups) const;

// Nom de l'algorithme
const ALString GetName() const;
const ALString GetName() const override;

/////////////////////////////////////////////////////////////////
//// Implementation
Expand All @@ -65,8 +65,8 @@ class DTGrouperMODLTwoClasses : public KWDiscretizerMODL // public DTDiscretizer
// Initialisation des variables de travail
// Reimplementation vide de ces methodes virtuelle pour les inhiber
// (le parametrage est fait par l'appelant)
void InitializeWorkingData(const KWFrequencyTable* kwftSource) const;
void CleanWorkingData() const;
void InitializeWorkingData(const KWFrequencyTable* kwftSource) const override;
void CleanWorkingData() const override;

// Discretisation d'une table granularisee avec recherche d'une table optimale avec groupe poubelle
void DiscretizeFrequencyTable(KWFrequencyTable* kwftSource, KWFrequencyTable*& kwftTarget) const;
Expand All @@ -85,8 +85,8 @@ class DTGrouperMODLTwoClasses : public KWDiscretizerMODL // public DTDiscretizer

// Ajout et retrait d'un intervalle de la liste de travail triee par nombre de modalites de l'intervalle
// Necessite que la liste soit initialisee
virtual void AddIntervalToWorkingFrequencyList(KWMODLLine* interval) const;
virtual void RemoveIntervalFromWorkingFrequencyList(KWMODLLine* interval) const;
void AddIntervalToWorkingFrequencyList(KWMODLLine* interval) const override;
void RemoveIntervalFromWorkingFrequencyList(KWMODLLine* interval) const override;

// Rangement des intervalles dans une liste triee ordonnee par effectif decroissant du nombre de modalites
// En entree : frequencyList est NULL
Expand All @@ -96,7 +96,7 @@ class DTGrouperMODLTwoClasses : public KWDiscretizerMODL // public DTDiscretizer

// Calcul du nombre de modalites d'une ligne de contingence decrite par ses index de debut et de fin par rapport
// a une table kwftSource En entree, la table kwftSource est initialisee En sortie, le nombre de modalites
virtual int ComputeModalityNumber(const KWFrequencyTable* kwftSource, int nFirstIndex, int LastIndex) const;
int ComputeModalityNumber(const KWFrequencyTable* kwftSource, int nFirstIndex, int LastIndex) const override;

// Calcul du nombre de modalites du groupe poubelle d'une partition
// En entree, la table kwftTargetWithGarbage dont les KWFrequencyVector contiennent le nombre de modalites par
Expand Down
2 changes: 1 addition & 1 deletion src/Learning/KWDRRuleLibrary/KWDRMath.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ Symbol KWDRFormatContinuous::ComputeSymbolResult(const KWObject* kwoObject) cons
nLength--;
if (cValue < 0)
nLength++;
sprintf(sBuffer, "%0*.*f", nLength, nPrecision, (double)cValue);
snprintf(sBuffer, sizeof(sBuffer), "%0*.*f", nLength, nPrecision, (double)cValue);
return (Symbol)sBuffer;
}
}
Expand Down
1 change: 1 addition & 0 deletions src/Learning/KWData/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ add_library(
KWValueBlock.cpp
KWValueDictionary.cpp
KWValueSparseVector.cpp
Profiler.cpp
${BISON_KWCParser_OUTPUTS}
${FLEX_KWCScanner_OUTPUTS}
${FLEX_JsonScanner_OUTPUTS})
Expand Down
4 changes: 3 additions & 1 deletion src/Learning/KWData/JSONFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,8 @@ class JSONFile : public Object
static KWIntVectorSorter ivsWindows1252ControlCharUtf8CodeSorter;

// Instance statique de JSONFile, permettant de forcer l'initialisation des structure d'encodage une fois
// pour toute lors de l'appel du constructuer de cette instance
// pour toute lors de l'appel du constructeur de cette instance
// Ne pas declarer d'autre instances statiques de JSONFile, par exemple via d'autre classes, sinon cela pose
// des probleme de memoire non liberee non diagnostique par les outils de getsion de la memoire
static JSONFile jsonFileGlobalInitializer;
};
2 changes: 1 addition & 1 deletion src/Learning/KWData/KWCYac.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3215,7 +3215,7 @@ void yyerrorWithLineCorrection(char const* fmt, int nDeltaLineNumber)
nLineNumber = yylineno + nDeltaLineNumber;
if (nLineNumber <= 0)
nLineNumber = 1;
sprintf(sErrorLine, "Line %d", nLineNumber);
snprintf(sErrorLine, sizeof(sErrorLine), "Line %d", nLineNumber);
sLabel = fmt;
Global::AddError("Read dictionary file", sErrorLine, sLabel);
}
Expand Down
2 changes: 1 addition & 1 deletion src/Learning/KWData/KWCYac.yac
Original file line number Diff line number Diff line change
Expand Up @@ -1405,7 +1405,7 @@ void yyerrorWithLineCorrection(char const *fmt, int nDeltaLineNumber)
nLineNumber = yylineno+nDeltaLineNumber;
if (nLineNumber <= 0)
nLineNumber = 1;
sprintf(sErrorLine, "Line %d", nLineNumber);
snprintf(sErrorLine, sErrorLine, "Line %d", nLineNumber);
sLabel = fmt;
Global::AddError("Read dictionary file",
sErrorLine,
Expand Down
14 changes: 7 additions & 7 deletions src/Learning/KWData/KWContinuous.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1567,7 +1567,7 @@ const char* const KWContinuous::StandardContinuousToString(Continuous cValue)
if (cValue == GetMissingValue())
sBuffer[0] = '\0';
else
sprintf(sBuffer, "%.10g", cValue);
snprintf(sBuffer, BUFFER_LENGTH, "%.10g", cValue);
return sBuffer;
}

Expand Down Expand Up @@ -1860,7 +1860,7 @@ void KWContinuous::CompareStringToContinuous(Continuous cValue, boolean bShow)
return;

// Conversion
sprintf(sValue, "%.15g", cValue);
snprintf(sValue, sizeof(sValue), "%.15g", cValue);
dRefValue = StandardStringToContinuous(sValue);
dNewValue = StringToContinuous(sValue);

Expand All @@ -1884,7 +1884,7 @@ const char* const KWContinuous::MaxPrecisionDoubleToString(double dValue)
{
char* sBuffer = StandardGetBuffer();

sprintf(sBuffer, "%.15g", dValue);
snprintf(sBuffer, BUFFER_LENGTH, "%.15g", dValue);

return sBuffer;
}
Expand Down Expand Up @@ -1939,7 +1939,7 @@ void KWContinuous::TestPerformanceStringToContinuous(int nMaxLowerBaseValue, dou
{
// Nombre positif avec exposant positif
cValue = (cUpperBaseValue + nLowerBaseValue) * dPositivePower10[nExponent];
sprintf(sValue, "%.15g", cValue);
snprintf(sValue, sizeof(sValue), "%.15g", cValue);
if (bRefConversion)
StandardStringToContinuous(sValue);
if (bNewConversion)
Expand All @@ -1949,7 +1949,7 @@ void KWContinuous::TestPerformanceStringToContinuous(int nMaxLowerBaseValue, dou

// Nombre positif avec exposant negatif
cValue = (cUpperBaseValue + nLowerBaseValue) * dNegativePower10[nExponent];
sprintf(sValue, "%.15g", cValue);
snprintf(sValue, sizeof(sValue), "%.15g", cValue);
if (bRefConversion)
StandardStringToContinuous(sValue);
if (bNewConversion)
Expand All @@ -1959,7 +1959,7 @@ void KWContinuous::TestPerformanceStringToContinuous(int nMaxLowerBaseValue, dou

// Nombre negatif avec exposant positif
cValue = -(cUpperBaseValue + nLowerBaseValue) * dPositivePower10[nExponent];
sprintf(sValue, "%.15g", cValue);
snprintf(sValue, sizeof(sValue), "%.15g", cValue);
if (bRefConversion)
StandardStringToContinuous(sValue);
if (bNewConversion)
Expand All @@ -1969,7 +1969,7 @@ void KWContinuous::TestPerformanceStringToContinuous(int nMaxLowerBaseValue, dou

// Nombre negatif avec exposant negatif
cValue = -(cUpperBaseValue + nLowerBaseValue) * dNegativePower10[nExponent];
sprintf(sValue, "%.15g", cValue);
snprintf(sValue, sizeof(sValue), "%.15g", cValue);
if (bRefConversion)
StandardStringToContinuous(sValue);
if (bNewConversion)
Expand Down
2 changes: 1 addition & 1 deletion src/Learning/KWData/KWDatabase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1341,7 +1341,7 @@ void KWDatabase::TestCreateObjects(int nNumber)
// Creation d'objets
DeleteAll();
for (i = 0; i < nNumber; i++)
GetObjects()->Add(KWObject::CreateObject(kwcCreationClass, i + 1));
GetObjects()->Add(KWObject::CreateObject(kwcCreationClass, (longint)i + 1));
}

void KWDatabase::TestRead()
Expand Down
26 changes: 14 additions & 12 deletions src/Learning/KWData/KWDate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ const char* const Date::ToString() const
if (not Check())
sDate[0] = '\0';
else
sprintf(sDate, "%04d-%02d-%02d", GetYear(), GetMonth(), GetDay());
snprintf(sDate, BUFFER_LENGTH, "%04d-%02d-%02d", GetYear(), GetMonth(), GetDay());
return sDate;
}

Expand Down Expand Up @@ -320,9 +320,11 @@ const char* const Date::TimeZoneToString(boolean bExtended) const
else
cSign = '-';
if (bExtended)
sprintf(sTimeZone, "%c%02d:%02d", cSign, GetTimeZoneHour(), GetTimeZoneMinute());
snprintf(sTimeZone, BUFFER_LENGTH, "%c%02d:%02d", cSign, GetTimeZoneHour(),
GetTimeZoneMinute());
else
sprintf(sTimeZone, "%c%02d%02d", cSign, GetTimeZoneHour(), GetTimeZoneMinute());
snprintf(sTimeZone, BUFFER_LENGTH, "%c%02d%02d", cSign, GetTimeZoneHour(),
GetTimeZoneMinute());
}
}
return sTimeZone;
Expand Down Expand Up @@ -618,11 +620,11 @@ const char* const KWDateFormat::DateToString(Date dtValue) const
nOffset = 0;
assert(nOffset == nYearOffset or nOffset == nMonthOffset or nOffset == nDayOffset);
if (nYearOffset == 0)
nOffset += sprintf(sBuffer, "%04d", dtValue.GetYear());
nOffset += snprintf(sBuffer, BUFFER_LENGTH, "%04d", dtValue.GetYear());
else if (nMonthOffset == 0)
nOffset += sprintf(sBuffer, "%02d", dtValue.GetMonth());
nOffset += snprintf(sBuffer, BUFFER_LENGTH, "%02d", dtValue.GetMonth());
else
nOffset += sprintf(sBuffer, "%02d", dtValue.GetDay());
nOffset += snprintf(sBuffer, BUFFER_LENGTH, "%02d", dtValue.GetDay());

// Ecriture du premier separateur optionnel
if (nSeparatorOffset1 == nOffset)
Expand All @@ -634,11 +636,11 @@ const char* const KWDateFormat::DateToString(Date dtValue) const
// Ecriture du deuxieme champ
assert(nOffset == nYearOffset or nOffset == nMonthOffset or nOffset == nDayOffset);
if (nYearOffset == nOffset)
nOffset += sprintf(sBuffer + nOffset, "%04d", dtValue.GetYear());
nOffset += snprintf(sBuffer + nOffset, BUFFER_LENGTH - nOffset, "%04d", dtValue.GetYear());
else if (nMonthOffset == nOffset)
nOffset += sprintf(sBuffer + nOffset, "%02d", dtValue.GetMonth());
nOffset += snprintf(sBuffer + nOffset, BUFFER_LENGTH - nOffset, "%02d", dtValue.GetMonth());
else
nOffset += sprintf(sBuffer + nOffset, "%02d", dtValue.GetDay());
nOffset += snprintf(sBuffer + nOffset, BUFFER_LENGTH - nOffset, "%02d", dtValue.GetDay());

// Ecriture du deuxiemme separateur optionnel
if (nSeparatorOffset2 == nOffset)
Expand All @@ -650,11 +652,11 @@ const char* const KWDateFormat::DateToString(Date dtValue) const
// Ecriture du troisieme champ
assert(nOffset == nYearOffset or nOffset == nMonthOffset or nOffset == nDayOffset);
if (nYearOffset == nOffset)
nOffset += sprintf(sBuffer + nOffset, "%04d", dtValue.GetYear());
nOffset += snprintf(sBuffer + nOffset, BUFFER_LENGTH - nOffset, "%04d", dtValue.GetYear());
else if (nMonthOffset == nOffset)
nOffset += sprintf(sBuffer + nOffset, "%02d", dtValue.GetMonth());
nOffset += snprintf(sBuffer + nOffset, BUFFER_LENGTH - nOffset, "%02d", dtValue.GetMonth());
else
nOffset += sprintf(sBuffer + nOffset, "%02d", dtValue.GetDay());
nOffset += snprintf(sBuffer + nOffset, BUFFER_LENGTH - nOffset, "%02d", dtValue.GetDay());
}
return sBuffer;
}
Expand Down
23 changes: 13 additions & 10 deletions src/Learning/KWData/KWTime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,13 @@ const char* const Time::ToString() const

// Cas ou il n'y a pas de fraction de secondes
if (nSecondFrac == 0)
sprintf(sTime, "%02d:%02d:%02d", GetHour(), GetMinute(), (int)timeValue.timeFields.nSecond);
snprintf(sTime, BUFFER_LENGTH, "%02d:%02d:%02d", GetHour(), GetMinute(),
(int)timeValue.timeFields.nSecond);
// Cas avec fraction de secondes
else
{
nLength = sprintf(sTime, "%02d:%02d:%02d.%04d", GetHour(), GetMinute(),
(int)timeValue.timeFields.nSecond, nSecondFrac);
nLength = snprintf(sTime, BUFFER_LENGTH, "%02d:%02d:%02d.%04d", GetHour(), GetMinute(),
(int)timeValue.timeFields.nSecond, nSecondFrac);

// Supression des zero en fin pour ne garder que la partie utile des decimales de secondes
for (i = 0; i < 4; i++)
Expand Down Expand Up @@ -660,9 +661,9 @@ const char* const KWTimeFormat::TimeToString(Time tmValue) const
nOffset = 0;
assert(nOffset == nHourOffset);
if (bMandatoryFirstDigit)
nOffset += sprintf(sBuffer, "%02d", tmValue.GetHour());
nOffset += snprintf(sBuffer, BUFFER_LENGTH, "%02d", tmValue.GetHour());
else
nOffset += sprintf(sBuffer, "%d", tmValue.GetHour());
nOffset += snprintf(sBuffer, BUFFER_LENGTH, "%d", tmValue.GetHour());

// Ecriture du premier separateur optionnel
if (nSeparatorOffset1 != -1)
Expand All @@ -673,9 +674,9 @@ const char* const KWTimeFormat::TimeToString(Time tmValue) const

// Ecriture du champ minute
if (bMandatoryFirstDigit)
nOffset += sprintf(sBuffer + nOffset, "%02d", tmValue.GetMinute());
nOffset += snprintf(sBuffer + nOffset, BUFFER_LENGTH - nOffset, "%02d", tmValue.GetMinute());
else
nOffset += sprintf(sBuffer + nOffset, "%d", tmValue.GetMinute());
nOffset += snprintf(sBuffer + nOffset, BUFFER_LENGTH - nOffset, "%d", tmValue.GetMinute());

// Ecriture du deuxieme separateur optionnel
if (nSeparatorOffset2 != -1)
Expand All @@ -693,9 +694,11 @@ const char* const KWTimeFormat::TimeToString(Time tmValue) const
if (nSecondOffset != -1)
{
if (bMandatoryFirstDigit)
nOffset += sprintf(sBuffer + nOffset, "%02d", (int)floor(tmValue.GetSecond()));
nOffset += snprintf(sBuffer + nOffset, BUFFER_LENGTH - nOffset, "%02d",
(int)floor(tmValue.GetSecond()));
else
nOffset += sprintf(sBuffer + nOffset, "%d", (int)floor(tmValue.GetSecond()));
nOffset += snprintf(sBuffer + nOffset, BUFFER_LENGTH - nOffset, "%d",
(int)floor(tmValue.GetSecond()));
}

// Ecriture de la partie decimale des secondes
Expand All @@ -709,7 +712,7 @@ const char* const KWTimeFormat::TimeToString(Time tmValue) const
// Ecriture des fractions de secondes
if (nSecondFrac > 0)
{
nOffset += sprintf(sBuffer + nOffset, ".%04d", nSecondFrac);
nOffset += snprintf(sBuffer + nOffset, BUFFER_LENGTH - nOffset, ".%04d", nSecondFrac);

// Supression des zero en fin pour ne garder que la partie utile des decimales de
// secondes
Expand Down
2 changes: 1 addition & 1 deletion src/Learning/KWData/KWTimestamp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ const char* const Timestamp::ToString() const
if (not Check())
sTimestamp[0] = '\0';
else
sprintf(sTimestamp, "%s %s", GetDate().ToString(), GetTime().ToString());
snprintf(sTimestamp, BUFFER_LENGTH, "%s %s", GetDate().ToString(), GetTime().ToString());
return sTimestamp;
}

Expand Down
4 changes: 2 additions & 2 deletions src/Learning/KWData/KWTimestampTZ.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ const char* const TimestampTZ::ToString() const
if (not Check())
sTimestampTZ[0] = '\0';
else
sprintf(sTimestampTZ, "%s %s%s", GetInternalDate().ToString(), GetInternalTime().ToString(),
GetInternalDate().TimeZoneToString(true));
snprintf(sTimestampTZ, BUFFER_LENGTH, "%s %s%s", GetInternalDate().ToString(),
GetInternalTime().ToString(), GetInternalDate().TimeZoneToString(true));
return sTimestampTZ;
}

Expand Down
2 changes: 1 addition & 1 deletion src/Learning/KWData/KWTimestampTZ.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class KWTimestampTZFormat;
// Prefixe pour les variables: tstz
// Pas de constructeur pour pouvoir etre utilise dans l'union de KWValue
//
// Les TimestampTZ peuvent comporter ou non information de type time zone (time zone aware ou unaware.
// Les TimestampTZ peuvent comporter ou non information de type time zone (time zone aware ou unaware)
class TimestampTZ : public SystemObject
{
public:
Expand Down
Loading

0 comments on commit 008d8b4

Please sign in to comment.