From 3da10ddd17ac5fd3cdc01e6c9f8125c82936b8fc Mon Sep 17 00:00:00 2001 From: David Gasquez Date: Mon, 11 Dec 2023 11:31:22 +0000 Subject: [PATCH] =?UTF-8?q?chore:=20=F0=9F=9A=9A=20simplify?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .devcontainer/devcontainer.json | 7 +------ Dockerfile | 8 +++++--- Makefile | 34 ++++++++------------------------- README.md | 6 +++--- datadex/dag.py | 4 +++- dbt/dbt_project.yml | 15 +++++---------- dbt/plugins/__init__.py | 0 dbt/plugins/frictionless.py | 15 --------------- dbt/profiles.yml | 4 ++-- pyproject.toml | 14 +++++++++----- 10 files changed, 36 insertions(+), 71 deletions(-) delete mode 100644 dbt/plugins/__init__.py delete mode 100644 dbt/plugins/frictionless.py diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 64d0838..091f75a 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -7,10 +7,10 @@ "customizations": { "vscode": { "settings": { + "quarto.render.previewType": "external", "files.associations": { "*.sql": "jinja-sql" }, - "quarto.render.previewType": "external", "[jinja-sql]": { "editor.defaultFormatter": "innoverio.vscode-dbt-power-user", "editor.formatOnSave": true @@ -48,12 +48,7 @@ ] } }, - "features": { - "ghcr.io/stuartleeks/dev-container-features/shell-history:latest": {}, - "ghcr.io/devcontainers/features/common-utils:latest": {} - }, "postCreateCommand": "pip install -e '.[dev]'", - "remoteUser": "vscode", "portsAttributes": { "3000": { "label": "Dagster" diff --git a/Dockerfile b/Dockerfile index 6b290c0..578891b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,6 +14,8 @@ RUN curl -sL $(curl https://quarto.org/docs/download/_prerelease.json | grep -oP && dpkg -i /tmp/quarto.deb \ && rm /tmp/quarto.deb -# Workspace Folder -ENV WORKSPACE_FOLDER=/workspaces/datadex -WORKDIR $WORKSPACE_FOLDER +# Environment Variables +ENV DAGSTER_HOME "/home/vscode" + +# Working Directory +WORKDIR /workspaces/datadex diff --git a/Makefile b/Makefile index de69319..634f7f6 100644 --- a/Makefile +++ b/Makefile @@ -1,43 +1,25 @@ .DEFAULT_GOAL := run -IMAGE_NAME := davidgasquez/datadex:v1.0.0 - run: - dagster asset materialize --select \* -m datadex.dag; + dagster asset materialize --select \* -m datadex.dag dev: dagster dev -m datadex.dag -.PHONY: docs +preview: + quarto preview + docs: cd dbt && dbt docs generate --profiles-dir . mkdir -p dbt/target/docs cp dbt/target/*.json dbt/target/index.html dbt/target/graph.gpickle dbt/target/docs/ -quarto: docs +render: docs quarto render quarto render README.md -M output-file:index cp -r dbt/target/docs/ .quarto/output/docs -preview: - quarto preview - clean: - dbt clean --project-dir dbt; - rm -rf data/* output .quarto target dbt_packages - -rill: - curl -s https://cdn.rilldata.com/install.sh | bash - rill start ~/rill - -build: - docker build -t $(IMAGE_NAME) -t davidgasquez/datadex:latest . - -docker-run: - docker run -it --rm -u vscode -e DAGIT_HOST=0.0.0.0 -p 3000:3000 \ - -v $(PWD):/workspaces/datadex \ - --env-file .env \ - $(IMAGE_NAME) /bin/bash - -push: - docker push $(IMAGE_NAME) + rm -rf data/*.parquet data/*.duckdb + rm -rf .quarto + rm -rf dbt/target dbt/dbt_packages dbt/logs diff --git a/README.md b/README.md index 75a39e9..fa3d046 100644 --- a/README.md +++ b/README.md @@ -20,11 +20,11 @@ This repository is an up to date toy implementation of the overall pattern. You ### 💡 Principles -- **Open**: Code and data are fully open. [Use open standards and share data in accesible formats](https://voltrondata.com/codex/a-new-frontier). -- **Modular**: Each component can be replaced, extended, or removed. Works well in many environments (your laptop, in a cluster, or from the browser), and with multiple tools (thanks to the Arrow ecosystem). +- **Open**: Code and data are fully open source. +- **Modular and Interoperable**: Each component can be replaced, extended, or removed. Works well in many environments (your laptop, in a cluster, or from the browser), and with multiple tools (thanks to the Arrow ecosystem). [Use open standards and share data in accesible formats](https://voltrondata.com/codex/a-new-frontier). - **Permissionless**. Don't ask, fork it and improve the models, add a new source or update any script. No API limits, just plain files. - **Data as Code**. Declarative stateless transformations tracked in `git`. Version your data as code! Publish and share your reusable models for others to build on top. -- **Modern**: Supports types, tests, materialized views, and more. +- **Glue**: Be a bridge between tools and aproaches. E.g: Use software engineering good practices like types, tests, materialized views, and more. Datadex is not a new tool. **Datadex is a pattern showing an opinionated bridge between existing ones**. diff --git a/datadex/dag.py b/datadex/dag.py index b69fbc5..20f98e4 100644 --- a/datadex/dag.py +++ b/datadex/dag.py @@ -7,6 +7,8 @@ from . import assets DBT_PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) + "/../dbt/" +DATA_DIR = os.path.dirname(os.path.abspath(__file__)) + "/../data/" + dbt_resource = dbt_cli_resource.configured( {"project_dir": DBT_PROJECT_DIR, "profiles_dir": DBT_PROJECT_DIR} @@ -18,7 +20,7 @@ resources = { "dbt": dbt_resource, - "io_manager": DuckDBPandasIOManager(database=f"{os.getenv('DATA_DIR')}/local.duckdb"), + "io_manager": DuckDBPandasIOManager(database=DATA_DIR + "local.duckdb"), } defs = Definitions(assets=[*dbt_assets, *python_assets], resources=resources) diff --git a/dbt/dbt_project.yml b/dbt/dbt_project.yml index 0c81067..77e8aba 100644 --- a/dbt/dbt_project.yml +++ b/dbt/dbt_project.yml @@ -1,16 +1,11 @@ -name: "datadex" -version: "1.0.0" +name: datadex +version: 1.0.0 config-version: 2 -profile: "default" +profile: default -target-path: "target" -clean-targets: - - "logs" - - "target" - - "database" - - "dbt_packages" +target-path: target models: datadex: - +materialized: external + +materialized: table diff --git a/dbt/plugins/__init__.py b/dbt/plugins/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/dbt/plugins/frictionless.py b/dbt/plugins/frictionless.py deleted file mode 100644 index a609734..0000000 --- a/dbt/plugins/frictionless.py +++ /dev/null @@ -1,15 +0,0 @@ -from frictionless import Package - -from dbt.adapters.duckdb.plugins import BasePlugin - - -class Plugin(BasePlugin): - def load(self, config): - package = Package(config.meta.get("package")) - resource_name = config.name - - if "_" in resource_name: - resource_name = resource_name.replace("_", "-") - - resource = package.get_resource(resource_name) - return resource.to_pandas() diff --git a/dbt/profiles.yml b/dbt/profiles.yml index cc0082d..8f908e6 100644 --- a/dbt/profiles.yml +++ b/dbt/profiles.yml @@ -2,7 +2,7 @@ default: outputs: dev: type: duckdb - path: "../{{ env_var('DATA_DIR') }}/local.duckdb" + path: "../data/local.duckdb" threads: 8 extensions: - httpfs @@ -10,7 +10,7 @@ default: settings: enable_object_cache: true enable_http_metadata_cache: true - external_root: "../{{ env_var('DATA_DIR') }}" + external_root: "../data/" target: dev config: diff --git a/pyproject.toml b/pyproject.toml index a00b766..db64f37 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,7 @@ [project] name = "datadex" version = "1.0.0" +authors = [{ name = "David Gasquez" }] dependencies = [ "altair", "dagster-dbt", @@ -24,6 +25,12 @@ requires-python = ">=3.11, <=3.12" readme = "README.md" license = { text = "MIT" } +[project.urls] +Homepage = "https://davidgasquez.github.io/datadex/" +Changelog = "https://github.com/davidgasquez/datadex/commits/main/" +Issues = "https://github.com/davidgasquez/datadex/issues" +CI = "https://github.com/davidgasquez/datadex/actions" + [project.optional-dependencies] dev = ["dagit", "shandy-sqlfmt[jinjafmt]", "ruff"] @@ -31,11 +38,8 @@ dev = ["dagit", "shandy-sqlfmt[jinjafmt]", "ruff"] requires = ["setuptools"] build-backend = "setuptools.build_meta" -[tool.setuptools] -packages = ["datadex"] - [tool.setuptools.package-data] "datadex" = ["../dbt/**"] -[tool.dagster] -module_name = "datadex" +[tool.setuptools] +packages = ["datadex"]