Skip to content
This repository has been archived by the owner on May 4, 2023. It is now read-only.

Commit

Permalink
Doc String suggestions in DDBT schema-gen (#35)
Browse files Browse the repository at this point in the history
* doc file type and filesystem

* Add doc suggestions to schema gen

* WIP for suggested docs

* split doc suggestions graph execution with mutex

* Add user prompt and write to file

* update file pointers and user prompt

* formatting

* prompt improvements

* remove obselete test

* increment version

* update emojis and formatting

* move completion statement

* Restore utils_test.go

* use map to search for column name

* Add new method to unmark graph run status

Co-authored-by: Ibrahim Faruqi <[email protected]>
  • Loading branch information
robertpknight and imfaruqi authored Apr 29, 2021
1 parent 15c1f9d commit e33d08f
Show file tree
Hide file tree
Showing 7 changed files with 233 additions and 6 deletions.
21 changes: 18 additions & 3 deletions cmd/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,20 @@ func addFailOnNotFoundFlag(cmd *cobra.Command) {
cmd.Flags().BoolVarP(&FailOnNotFound, "fail-on-not-found", "f", true, "Fail if given models are not found")
}

func compileAllModels() (*fs.FileSystem, *compiler.GlobalContext) {
_, _ = fmt.Fprintf(os.Stderr, "ℹ️ Building for %s profile\n", config.GlobalCfg.Target.Name)

func readFileSystem() *fs.FileSystem {
// Read the models on the file system
fileSystem, err := fs.ReadFileSystem(os.Stderr)
if err != nil {
fmt.Printf("❌ Unable to read filesystem: %s\n", err)
os.Exit(1)
}
return fileSystem
}

func compileAllModels() (*fs.FileSystem, *compiler.GlobalContext) {
_, _ = fmt.Fprintf(os.Stderr, "ℹ️ Building for %s profile\n", config.GlobalCfg.Target.Name)

fileSystem := readFileSystem()
// Now parse and compile the whole project
parseSchemas(fileSystem)
parseFiles(fileSystem)
Expand All @@ -82,6 +86,17 @@ func compileAllModels() (*fs.FileSystem, *compiler.GlobalContext) {
return fileSystem, gc
}

func allDocFiles() map[string]interface{} {
fileSystem := readFileSystem()

docFiles := make(map[string]interface{})
for _, doc := range fileSystem.Docs {
docFiles[doc.Name] = nil
}

return docFiles
}

func parseFiles(fileSystem *fs.FileSystem) {
pb := utils.NewProgressBar("📜 Reading & Parsing Files", fileSystem.NumberFiles())
defer pb.Stop()
Expand Down
119 changes: 119 additions & 0 deletions cmd/schema_gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"ddbt/fs"
"ddbt/properties"
"ddbt/utils"
"sync"

"fmt"
"os"
Expand Down Expand Up @@ -41,13 +42,25 @@ var schemaGenCmd = &cobra.Command{

// Build a graph from the given filter.
fileSystem, _ := compileAllModels()

graph := buildGraph(fileSystem, ModelFilters)

// Generate schema for every file in the graph concurrently.
if err := generateSchemaForGraph(graph); err != nil {
fmt.Printf("❌ %s\n", err)
os.Exit(1)
}

// refresh the graph state for doc suggestions
fmt.Println("\n🌀 Resetting graph run status for doc string suggestions")
graph.UnmarkGraphAsFullyRun()

if err := suggestDocsForGraph(graph); err != nil {
fmt.Printf("❌ %s\n", err)
os.Exit(1)
}
os.Exit(1)

},
}

Expand All @@ -74,6 +87,62 @@ func generateSchemaForGraph(graph *fs.Graph) error {
pb.Increment()
return nil
}, config.NumberThreads(), pb)

}

type DocSuggestions struct {
mu sync.Mutex
suggestions map[string][]string
}

func (d *DocSuggestions) AppendSuggestion(modelName string, modelSuggestions []string) {
d.mu.Lock()
d.suggestions[modelName] = modelSuggestions
d.mu.Unlock()
}

func (d *DocSuggestions) Init() {
d.mu.Lock()
d.suggestions = make(map[string][]string)
d.mu.Unlock()

}

func (d *DocSuggestions) Value() (suggestions map[string][]string) {
d.mu.Lock()
suggestions = d.suggestions
d.mu.Unlock()
return
}

func suggestDocsForGraph(graph *fs.Graph) error {
allDocs := allDocFiles()

pb := utils.NewProgressBar("🎁 Suggesting docs", graph.Len())

var docSugs DocSuggestions
docSugs.Init()
err := graph.Execute(func(file *fs.File) error {
if file.Type == fs.ModelFile {
modelName, modelSuggestions := suggestDocs(file, allDocs)
if len(modelSuggestions) > 0 {
docSugs.AppendSuggestion(modelName, modelSuggestions)
}
}
pb.Increment()
return nil
}, config.NumberThreads(), pb)
if err != nil {
return err
}
pb.Stop()

err = userPromptDocs(graph, docSugs.Value())
if err != nil {
return err
}

return nil
}

// generateSchemaForModel generates a schema and writes yml for modelName.
Expand Down Expand Up @@ -116,6 +185,7 @@ func generateSchemaForModel(ctx context.Context, model *fs.File) error {
return err
}
fmt.Println("\n✅ " + model.Name + "schema successfully updated at path: " + ymlPath)

return nil
}

Expand Down Expand Up @@ -198,3 +268,52 @@ func removeOutdatedColumnsFromSchema(schemaModel *properties.Model, bqColumns []
schemaModel.Columns = columnsKept
fmt.Println("➖ Columns removed from Schema (no longer in BQ table):", columnsRemoved)
}

func suggestDocs(file *fs.File, allDocFiles map[string]interface{}) (string, []string) {
var modelSuggestions []string

for ind, col := range file.Schema.Columns {
if col.Description == "" {
if _, found := allDocFiles[col.Name]; found {
// update column description on file pointer
file.Schema.Columns[ind].Description = fmt.Sprintf("{{ doc(\"%s\") }}", col.Name)
modelSuggestions = append(modelSuggestions, col.Name)
}
}
}
return file.Schema.Name, modelSuggestions
}

func userPromptDocs(graph *fs.Graph, docSugsMap map[string][]string) error {
if len(docSugsMap) > 0 {
fmt.Println("\n📄 Found existing doc files for columns in the following models: ")
for k, v := range docSugsMap {
if len(v) > 10 {
fmt.Println("\n🧬 Model:", k, "\n↪️ Suggestions:", len(v), "fields")
} else {
fmt.Println("\n🧬 Model:", k, "\n↪️ Suggestions:", v)
}
}
fmt.Println("\n❔Would you like to add docs strings to descriptions (y/N)?")

var userPrompt string
fmt.Scanln(&userPrompt)

if userPrompt == "y" {
for file, _ := range graph.ListNodes() {
if _, contains := docSugsMap[file.Name]; contains {
ymlPath, schemaFile := generateEmptySchemaFile(file)
schemaModel := file.Schema
schemaFile.Models = properties.Models{schemaModel}
err := schemaFile.WriteToFile(ymlPath)
if err != nil {
fmt.Println("Error writing YML to file in path")
return err
}
}
}
fmt.Println("✅ Docs added to schema files")
}
}
return nil
}
42 changes: 42 additions & 0 deletions fs/docs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package fs

import (
"io/ioutil"
"path/filepath"
"strings"
"sync"
)

type DocFile struct {
Name string
Path string
Contents string

mutex sync.Mutex
}

func newDocFile(path string) *DocFile {
return &DocFile{
Name: strings.TrimSuffix(filepath.Base(path), ".md"),
Path: path,
Contents: "",
}
}

func (d *DocFile) GetName() string {
return d.Name
}

func (d *DocFile) Parse(fs *FileSystem) error {
d.mutex.Lock()
defer d.mutex.Unlock()

// Read and parse the schema file
bytes, err := ioutil.ReadFile(d.Path)
if err != nil {
return err
}

d.Contents = string(bytes)
return nil
}
42 changes: 41 additions & 1 deletion fs/filesystem.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ type FileSystem struct {
schemas map[string]*SchemaFile // schema files
tests map[string]*File // Tests
seeds map[string]*SeedFile // Seed CSV files
Docs map[string]*DocFile
testMutex sync.Mutex
}

Expand All @@ -31,6 +32,7 @@ func ReadFileSystem(msgWriter io.Writer) (*FileSystem, error) {
schemas: make(map[string]*SchemaFile),
tests: make(map[string]*File),
seeds: make(map[string]*SeedFile),
Docs: make(map[string]*DocFile),
}

// FIXME: disabled for a bit
Expand All @@ -54,14 +56,19 @@ func ReadFileSystem(msgWriter io.Writer) (*FileSystem, error) {
return nil, err
}

if err := fs.scanDocDirectory("./docs/"); err != nil {
return nil, err
}

fmt.Fprintf(
msgWriter,
"🔎 Found %d models, %d macros, %d tests, %d schema, %d seed files\n",
"🔎 Found %d models, %d macros, %d tests, %d schema, %d seed files, %d docs\n",
len(fs.files)-len(fs.macroLookup)-len(fs.tests),
len(fs.macroLookup),
len(fs.tests),
len(fs.schemas),
len(fs.seeds),
len(fs.Docs),
)

return fs, nil
Expand All @@ -76,6 +83,7 @@ func InMemoryFileSystem(models map[string]string) (*FileSystem, error) {
schemas: make(map[string]*SchemaFile),
tests: make(map[string]*File),
seeds: make(map[string]*SeedFile),
Docs: make(map[string]*DocFile),
}

for filePath, contents := range models {
Expand Down Expand Up @@ -173,6 +181,12 @@ func (fs *FileSystem) recordSchemaFile(path string) error {
return nil
}

func (fs *FileSystem) recordDocFile(path string) error {
fs.Docs[path] = newDocFile(path)

return nil
}

// Maps macros into our lookup options
func (fs *FileSystem) mapMacroLookupOptions(file *File) error {
path := strings.TrimSuffix(filepath.Base(file.Path), ".sql")
Expand Down Expand Up @@ -212,6 +226,32 @@ func (fs *FileSystem) scanSeedDirectory(path string) error {
})
}

func (fs *FileSystem) scanDocDirectory(path string) error {
if _, err := os.Stat(path); err != nil {
if os.IsNotExist(err) {
// Return early if seed directory doesn't exist.
return nil
}
return err
}
return filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
// If we've encountered an error walking this path, let's return now
if err != nil {
return err
}

if info.IsDir() {
return nil
}

if filepath.Ext(filepath.Clean(path)) == ".md" {
return fs.recordDocFile(path)
}

return nil
})
}

func (fs *FileSystem) recordSeedFile(path string) error {
name := strings.TrimSuffix(filepath.Base(path), ".csv")

Expand Down
11 changes: 11 additions & 0 deletions fs/graph.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,10 @@ func (g *Graph) Len() int {
return len(g.nodes)
}

func (g *Graph) ListNodes() map[*File]*Node {
return g.nodes
}

func (g *Graph) Execute(f func(file *File) error, numWorkers int, pb *utils.ProgressBar) error {
var wait sync.WaitGroup

Expand Down Expand Up @@ -452,6 +456,13 @@ func (g *Graph) MarkGraphAsFullyRun() {
}
}

func (g *Graph) UnmarkGraphAsFullyRun() {
for _, node := range g.nodes {
node.queuedToRun = false
node.hasRun = false
}
}

// If a file is in the graph, this removes it's queuedToRun and hasRun flags
func (g *Graph) UnmarkFileAsRun(file *File) {
node, found := g.nodes[file]
Expand Down
2 changes: 1 addition & 1 deletion tests/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,4 +124,4 @@ func parseFile(file *fs.File) error {

file.SyntaxTree = syntaxTree
return nil
}
}
2 changes: 1 addition & 1 deletion utils/version.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
package utils

const DdbtVersion = "0.5.0"
const DdbtVersion = "0.5.1"

0 comments on commit e33d08f

Please sign in to comment.