Skip to content

Commit

Permalink
Add Model.ExtractorSettings, bump DaxModel version 1.5.0 (#121)
Browse files Browse the repository at this point in the history
* Re-run DMV extractor only if model has any DL partitions

* Add ExtractorSettings

* Refactor StatExtractor access modifiers to ensure consistent ExtractorSettings

* Bump CurrentDaxModelVersion 1.5.0

* Mark `StatExtractor.UpdateStatisticsModel` obsolete

* Rename ExtractorSettings to ExtractorProperties

* Fix Dax.Model.Extractor namespace in test utils Program.cs
  • Loading branch information
albertospelta authored Mar 26, 2024
1 parent 3049bd8 commit 204761e
Show file tree
Hide file tree
Showing 7 changed files with 113 additions and 47 deletions.
55 changes: 55 additions & 0 deletions src/Dax.Metadata/ExtractorProperties.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
namespace Dax.Metadata;

using System.ComponentModel;

public sealed class ExtractorProperties
{
/// <summary>
/// Specifies whether to enable statistics collection from the data instead of relying on the the DMVs. The result is more accurate statistics, but it can be slower.
/// </summary>
public bool StatisticsEnabled { get; set; }

/// <remarks>
/// This settings only applies when <see cref="StatisticsEnabled"/> is <see langword="true"/>.
/// </remarks>
public DirectLakeExtractionMode DirectLakeMode { get; set; }

/// <remarks>
/// This settings only applies when <see cref="StatisticsEnabled"/> is <see langword="true"/>.
/// </remarks>
public DirectQueryExtractionMode DirectQueryMode { get; set; }
}

public enum DirectLakeExtractionMode
{
/// <summary>
/// Only does a detailed scan of columns that are already in memory
/// </summary>
[Description("Only does a detailed scan of columns that are already in memory")]
ResidentOnly = 0,

/// <summary>
/// Only does a detailed scan of columns referenced by measures or relationships
/// </summary>
[Description("Only does a detailed scan of columns referenced by measures or relationships")]
Referenced = 1,

/// <summary>
/// Does a detailed scan of all columns forcing them to be paged into memory
/// </summary>
[Description("Does a detailed scan of all columns forcing them to be paged into memory")]
Full = 2
}

public enum DirectQueryExtractionMode
{
/// <summary>
/// Excludes all DirectQuery tables from statistics collection
/// </summary>
None = 0,

/// <summary>
/// Includes all DirectQuery tables in statistics collection
/// </summary>
Full = 1
}
16 changes: 15 additions & 1 deletion src/Dax.Metadata/Model.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;

namespace Dax.Metadata
{
Expand All @@ -25,6 +26,10 @@ public class Model
/// </summary>
public string ExtractorLibVersion { get; set; }
/// <summary>
/// Specifies settings used by the extractor
/// </summary>
public ExtractorProperties ExtractorProperties { get; set; }
/// <summary>
/// Library that manages the model info (e.g. Dax.Model)
/// </summary>
public string DaxModelLib { get; set; }
Expand Down Expand Up @@ -95,14 +100,15 @@ public class Model

public Model()
{
ExtractorProperties = new();
this.Tables = new List<Table>();
this.Relationships = new List<Relationship>();
this.Roles = new List<Role>();
}

// Manually update the version each time the DaxModel is modified - use https://semver.org/ specification
[JsonIgnore]
public static readonly string CurrentDaxModelVersion = new Version(1, 4, 0).ToString(3);
public static readonly string CurrentDaxModelVersion = new Version(1, 5, 0).ToString(3);

public Model(string extractorLib, string extractorLibVersion, string extractorApp = null, string extractorAppVersion = null) : this()
{
Expand Down Expand Up @@ -194,4 +200,12 @@ from ci in t.CalculationGroup.CalculationItems
}
*/
}

public static class ModelExtensions
{
public static bool HasDirectLakePartitions(this Model model)
{
return model.Tables.Any((t) => t.HasDirectLakePartitions);
}
}
}
6 changes: 3 additions & 3 deletions src/Dax.Metadata/Table.cs
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ public bool HasDualPartitions {
public bool HasDirectQueryPartitions {
get {
foreach (var partition in Partitions) {
if (partition.Mode == Partition.PartitionMode.DirectQuery || partition.Mode == Partition.PartitionMode.Dual )
if (partition.Mode == Partition.PartitionMode.DirectQuery || partition.Mode == Partition.PartitionMode.Dual)
return true;
if (partition.Mode == Partition.PartitionMode.Default && (this.Model.DefaultMode == Partition.PartitionMode.DirectQuery || this.Model.DefaultMode == Partition.PartitionMode.Dual ))
if (partition.Mode == Partition.PartitionMode.Default && (this.Model.DefaultMode == Partition.PartitionMode.DirectQuery || this.Model.DefaultMode == Partition.PartitionMode.Dual))
return true;
}
return false;
Expand All @@ -66,7 +66,7 @@ public bool HasDirectQueryPartitions {
public bool HasDirectLakePartitions {
get {
foreach (var partition in Partitions) {
if ( partition.Mode == Partition.PartitionMode.DirectLake)
if (partition.Mode == Partition.PartitionMode.DirectLake)
return true;
if (partition.Mode == Partition.PartitionMode.Default && (this.Model.DefaultMode == Partition.PartitionMode.DirectLake))
return true;
Expand Down
16 changes: 12 additions & 4 deletions src/Dax.Model.Extractor/StatExtractor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ public class StatExtractor
protected Dax.Metadata.Model DaxModel { get; private set; }
protected IDbConnection Connection { get; private set; }
protected int CommandTimeout { get; private set; } = 0;
public StatExtractor (Dax.Metadata.Model daxModel, IDbConnection connection )
private StatExtractor(Dax.Metadata.Model daxModel, IDbConnection connection)
{
this.DaxModel = daxModel;
this.Connection = connection;
Expand All @@ -22,8 +22,16 @@ protected IDbCommand CreateCommand(string commandText)
return Connection.CreateCommand(commandText);
}

// UpdateStatisticsModel has been marked as obsolete because its usage may require rerunning the DMVs for models with DirectLake partitions. Since this logic should be handled by the library, we may consider removing it from the public APIs in a future release.
[Obsolete("This method may produce incomplete results if used on a model with DirectLake partitions and DirectLakeExtractionMode parameter set to anything other than ResidentOnly. Use TomExtractor.GetDaxModel instead.")]
public static void UpdateStatisticsModel(Dax.Metadata.Model daxModel, IDbConnection connection, int sampleRows = 0, bool analyzeDirectQuery = false , DirectLakeExtractionMode analyzeDirectLake = DirectLakeExtractionMode.ResidentOnly)
{
// TODO: Remove after rafactoring the code to use ExtractorSettings: ExtractorProperties as a parameter
daxModel.ExtractorProperties.StatisticsEnabled = true;
daxModel.ExtractorProperties.DirectQueryMode = analyzeDirectQuery ? DirectQueryExtractionMode.Full : DirectQueryExtractionMode.None;
daxModel.ExtractorProperties.DirectLakeMode = analyzeDirectLake;
//daxModel.ExtractorProperties.ReferentialIntegrityViolationSamples = sampleRows;

StatExtractor extractor = new StatExtractor(daxModel, connection);
extractor.LoadTableStatistics(analyzeDirectQuery, analyzeDirectLake);
extractor.LoadColumnStatistics(analyzeDirectQuery, analyzeDirectLake);
Expand All @@ -33,7 +41,7 @@ public static void UpdateStatisticsModel(Dax.Metadata.Model daxModel, IDbConnect
extractor.DaxModel.ExtractionDate = DateTime.UtcNow;
}

public void LoadRelationshipStatistics(int sampleRows = 0,bool analyzeDirectQuery = false, DirectLakeExtractionMode analyzeDirectLake = DirectLakeExtractionMode.ResidentOnly)
private void LoadRelationshipStatistics(int sampleRows = 0,bool analyzeDirectQuery = false, DirectLakeExtractionMode analyzeDirectLake = DirectLakeExtractionMode.ResidentOnly)
{
// Maximum number of invalid keys used for extraction through SAMPLE, use TOPNSKIP or TOPN otherwise
const int MAX_KEYS_FOR_SAMPLE = 1000;
Expand Down Expand Up @@ -178,7 +186,7 @@ public void LoadRelationshipStatistics(int sampleRows = 0,bool analyzeDirectQuer
#endregion
}

public void LoadTableStatistics( bool analyzeDirectQuery = false , DirectLakeExtractionMode analyzeDirectLake = DirectLakeExtractionMode.ResidentOnly)
private void LoadTableStatistics( bool analyzeDirectQuery = false , DirectLakeExtractionMode analyzeDirectLake = DirectLakeExtractionMode.ResidentOnly)
{
// only get table stats if the table has more than 1 user created column
// (every table has a RowNumber column so we only want tables with more than 1 column)
Expand Down Expand Up @@ -232,7 +240,7 @@ private static string EmbedNameInString(string originalName)
{
return originalName.Replace("\"", "\"\"");
}
public void LoadColumnStatistics(bool analyzeDirectQuery = false, DirectLakeExtractionMode analyzeDirectLake = DirectLakeExtractionMode.ResidentOnly)
private void LoadColumnStatistics(bool analyzeDirectQuery = false, DirectLakeExtractionMode analyzeDirectLake = DirectLakeExtractionMode.ResidentOnly)
{
var allColumns =
(from t in DaxModel.Tables
Expand Down
12 changes: 8 additions & 4 deletions src/Dax.Model.Extractor/TomExtractor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -300,10 +300,12 @@ public static Dax.Metadata.Model GetDaxModel(string connectionString, string app
// Populate statistics by querying the data model
if (readStatisticsFromData)
{
#pragma warning disable CS0618 // Type or member is obsolete
StatExtractor.UpdateStatisticsModel(daxModel, connection, sampleRows, analyzeDirectQuery, analyzeDirectLake);
#pragma warning restore CS0618 // Type or member is obsolete

// if we have forced all columns into memory then re-run the DMVs to update the data with the new values after everything has been transcoded.
if (analyzeDirectLake > DirectLakeExtractionMode.ResidentOnly)
// If model has any DL partitions and we have forced all columns into memory then re-run the DMVs to update the data with the new values after everything has been transcoded.
if (analyzeDirectLake > DirectLakeExtractionMode.ResidentOnly && daxModel.HasDirectLakePartitions())
DmvExtractor.PopulateFromDmv(daxModel, connection, serverName, databaseName, applicationName, applicationVersion);
}
}
Expand Down Expand Up @@ -358,10 +360,12 @@ public static Dax.Metadata.Model GetDaxModel(string serverName, string databaseN
// Populate statistics by querying the data model
if (readStatisticsFromData)
{
#pragma warning disable CS0618 // Type or member is obsolete
StatExtractor.UpdateStatisticsModel(daxModel, connection, sampleRows, analyzeDirectQuery, analyzeDirectLake);
#pragma warning restore CS0618 // Type or member is obsolete

// if we have forced all columns into memory then re-run the DMVs to update the data with the new values after everything has been transcoded.
if (analyzeDirectLake > DirectLakeExtractionMode.ResidentOnly)
// If model has any DL partitions and we have forced all columns into memory then re-run the DMVs to update the data with the new values after everything has been transcoded.
if (analyzeDirectLake > DirectLakeExtractionMode.ResidentOnly && daxModel.HasDirectLakePartitions())
DmvExtractor.PopulateFromDmv(daxModel, connection, serverName, databaseName, applicationName, applicationVersion);
}
}
Expand Down
11 changes: 0 additions & 11 deletions src/Dax.Model.Extractor/Util.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,6 @@ internal class ExtractorInfo
public string Version { get; set; }
}

public enum DirectLakeExtractionMode
{

[Description("Only does a detailed scan of columns that are already in memory")]
ResidentOnly,
[Description("Only does a detailed scan of columns referenced by measures or relationships")]
Referenced,
[Description("Does a detailed scan of all columns forcing them to be paged into memory")]
Full
}

internal static class Util
{
public static ExtractorInfo GetExtractorInfo(object extractorInstance)
Expand Down
44 changes: 20 additions & 24 deletions utils/TestDaxModel/Program.cs
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Data.OleDb;
using Dax.Metadata;
using Dax.Model.Extractor;
using Dax.Vpax.Tools;
using Newtonsoft.Json;
using System.IO.Packaging;
using System;
using System.IO;
using Dax.Vpax.Tools;
using System.Linq;
using TOM = Microsoft.AnalysisServices.Tabular;
using Dax.Metadata.Extractor;

// TODO
// - Import from DMV 1100 (check for missing attributes?)
Expand Down Expand Up @@ -38,10 +34,10 @@ static void TestTomConnection()

var server = new TOM.Server();
server.Connect($"Provider=MSOLAP;Data Source={serverName};Initial Catalog={databaseName};");
var daxModel = Dax.Metadata.Extractor.TomExtractor.GetDaxModel(server.Databases[databaseName].Model, "TestDaxModel", "0.2");
var daxModel = TomExtractor.GetDaxModel(server.Databases[databaseName].Model, "TestDaxModel", "0.2");
var tomConnection = new Dax.Model.Extractor.Data.TomConnection(server, databaseName);
Dax.Metadata.Extractor.DmvExtractor.PopulateFromDmv(daxModel, tomConnection, serverName, databaseName, "TestDaxModel", "0.2");
Dax.Metadata.Extractor.StatExtractor.UpdateStatisticsModel(daxModel, tomConnection, sampleRows: 100);
DmvExtractor.PopulateFromDmv(daxModel, tomConnection, serverName, databaseName, "TestDaxModel", "0.2");
StatExtractor.UpdateStatisticsModel(daxModel, tomConnection, sampleRows: 100);
}

static void TestExportStream()
Expand All @@ -59,12 +55,12 @@ static void TestExportStream()
//
// Get Dax.Model object from the SSAS engine
//
Dax.Metadata.Model model = Dax.Metadata.Extractor.TomExtractor.GetDaxModel(serverName, databaseName, applicationName, applicationVersion);
Dax.Metadata.Model model = TomExtractor.GetDaxModel(serverName, databaseName, applicationName, applicationVersion);

//
// Get TOM model from the SSAS engine
//
TOM.Database database = includeTomModel ? Dax.Metadata.Extractor.TomExtractor.GetDatabase(serverName, databaseName) : null;
TOM.Database database = includeTomModel ? TomExtractor.GetDatabase(serverName, databaseName) : null;

//
// Create VertiPaq Analyzer views
Expand Down Expand Up @@ -105,12 +101,12 @@ static void TestExport()
//
// Get Dax.Model object from the SSAS engine
//
Dax.Metadata.Model model = Dax.Metadata.Extractor.TomExtractor.GetDaxModel(serverName, databaseName, applicationName, applicationVersion);
Dax.Metadata.Model model = TomExtractor.GetDaxModel(serverName, databaseName, applicationName, applicationVersion);

//
// Get TOM model from the SSAS engine
//
TOM.Database database = includeTomModel ? Dax.Metadata.Extractor.TomExtractor.GetDatabase(serverName, databaseName) : null;
TOM.Database database = includeTomModel ? TomExtractor.GetDatabase(serverName, databaseName) : null;

//
// Create VertiPaq Analyzer views
Expand Down Expand Up @@ -138,8 +134,8 @@ static void TestLocalVpaModel()
var conn = new System.Data.OleDb.OleDbConnection(connStr);

Dax.Metadata.Model m = new Dax.Metadata.Model();
Dax.Metadata.Extractor.DmvExtractor.PopulateFromDmv(m, conn, serverName, databaseName, "Test", "0.1");
Dax.Metadata.Extractor.StatExtractor.UpdateStatisticsModel(m, conn, 10);
DmvExtractor.PopulateFromDmv(m, conn, serverName, databaseName, "Test", "0.1");
StatExtractor.UpdateStatisticsModel(m, conn, 10);
DumpRelationships(m);
}
static void TestPbiShared()
Expand All @@ -165,7 +161,7 @@ static void TestPbiShared()
//Console.WriteLine("Connection open");

Dax.Metadata.Model m = new Dax.Metadata.Model();
Dax.Metadata.Extractor.DmvExtractor.PopulateFromDmv(m, conn, serverName, databaseName, "Test", "0.1");
DmvExtractor.PopulateFromDmv(m, conn, serverName, databaseName, "Test", "0.1");
}
static void TestPbiShared_2022()
{
Expand All @@ -179,7 +175,7 @@ static void TestPbiShared_2022()
conn.Open();

Dax.Metadata.Model m = new Dax.Metadata.Model();
Dax.Metadata.Extractor.DmvExtractor.PopulateFromDmv(m, conn, serverName, databaseName, "Test", "0.1");
DmvExtractor.PopulateFromDmv(m, conn, serverName, databaseName, "Test", "0.1");
}
static void GenericTest()
{
Expand All @@ -206,8 +202,8 @@ static void GenericTest()
const string pathOutput = @"c:\temp\";

Console.WriteLine("Getting model {0}:{1}", serverName, databaseName);
var database = Dax.Metadata.Extractor.TomExtractor.GetDatabase(serverName, databaseName);
var daxModel = Dax.Metadata.Extractor.TomExtractor.GetDaxModel(serverName, databaseName, "TestDaxModel", "0.2", true, 10, analyzeDirectQuery:true, analyzeDirectLake: directLakeExtractionMode);
var database = TomExtractor.GetDatabase(serverName, databaseName);
var daxModel = TomExtractor.GetDaxModel(serverName, databaseName, "TestDaxModel", "0.2", true, 10, analyzeDirectQuery:true, analyzeDirectLake: directLakeExtractionMode);
Console.WriteLine(database.CompatibilityMode);
//DumpReferencedColumns(daxModel);
//DumpReferencedMeasures(daxModel);
Expand Down Expand Up @@ -274,8 +270,8 @@ static void ConnectionStringTest()
const string pathOutput = @"c:\temp\";

Console.WriteLine("Getting model for connectionString {0}", connectionString);
var database = Dax.Metadata.Extractor.TomExtractor.GetDatabase(connectionString);
var daxModel = Dax.Metadata.Extractor.TomExtractor.GetDaxModel(connectionString, "TestDaxModel", "0.2", true, 10, analyzeDirectQuery: true);
var database = TomExtractor.GetDatabase(connectionString);
var daxModel = TomExtractor.GetDaxModel(connectionString, "TestDaxModel", "0.2", true, 10, analyzeDirectQuery: true);
Console.WriteLine(database.CompatibilityMode);
//DumpReferencedColumns(daxModel);
//DumpReferencedMeasures(daxModel);
Expand Down

0 comments on commit 204761e

Please sign in to comment.