Skip to content

Commit

Permalink
chore: 🚚 simplify
Browse files Browse the repository at this point in the history
  • Loading branch information
davidgasquez committed Dec 11, 2023
1 parent 2ed5e21 commit 3da10dd
Show file tree
Hide file tree
Showing 10 changed files with 36 additions and 71 deletions.
7 changes: 1 addition & 6 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
"customizations": {
"vscode": {
"settings": {
"quarto.render.previewType": "external",
"files.associations": {
"*.sql": "jinja-sql"
},
"quarto.render.previewType": "external",
"[jinja-sql]": {
"editor.defaultFormatter": "innoverio.vscode-dbt-power-user",
"editor.formatOnSave": true
Expand Down Expand Up @@ -48,12 +48,7 @@
]
}
},
"features": {
"ghcr.io/stuartleeks/dev-container-features/shell-history:latest": {},
"ghcr.io/devcontainers/features/common-utils:latest": {}
},
"postCreateCommand": "pip install -e '.[dev]'",
"remoteUser": "vscode",
"portsAttributes": {
"3000": {
"label": "Dagster"
Expand Down
8 changes: 5 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ RUN curl -sL $(curl https://quarto.org/docs/download/_prerelease.json | grep -oP
&& dpkg -i /tmp/quarto.deb \
&& rm /tmp/quarto.deb

# Workspace Folder
ENV WORKSPACE_FOLDER=/workspaces/datadex
WORKDIR $WORKSPACE_FOLDER
# Environment Variables
ENV DAGSTER_HOME "/home/vscode"

# Working Directory
WORKDIR /workspaces/datadex
34 changes: 8 additions & 26 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,43 +1,25 @@
.DEFAULT_GOAL := run

IMAGE_NAME := davidgasquez/datadex:v1.0.0

run:
dagster asset materialize --select \* -m datadex.dag;
dagster asset materialize --select \* -m datadex.dag

dev:
dagster dev -m datadex.dag

.PHONY: docs
preview:
quarto preview

docs:
cd dbt && dbt docs generate --profiles-dir .
mkdir -p dbt/target/docs
cp dbt/target/*.json dbt/target/index.html dbt/target/graph.gpickle dbt/target/docs/

quarto: docs
render: docs
quarto render
quarto render README.md -M output-file:index
cp -r dbt/target/docs/ .quarto/output/docs

preview:
quarto preview

clean:
dbt clean --project-dir dbt;
rm -rf data/* output .quarto target dbt_packages

rill:
curl -s https://cdn.rilldata.com/install.sh | bash
rill start ~/rill

build:
docker build -t $(IMAGE_NAME) -t davidgasquez/datadex:latest .

docker-run:
docker run -it --rm -u vscode -e DAGIT_HOST=0.0.0.0 -p 3000:3000 \
-v $(PWD):/workspaces/datadex \
--env-file .env \
$(IMAGE_NAME) /bin/bash

push:
docker push $(IMAGE_NAME)
rm -rf data/*.parquet data/*.duckdb
rm -rf .quarto
rm -rf dbt/target dbt/dbt_packages dbt/logs
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ This repository is an up to date toy implementation of the overall pattern. You

### 💡 Principles

- **Open**: Code and data are fully open. [Use open standards and share data in accesible formats](https://voltrondata.com/codex/a-new-frontier).
- **Modular**: Each component can be replaced, extended, or removed. Works well in many environments (your laptop, in a cluster, or from the browser), and with multiple tools (thanks to the Arrow ecosystem).
- **Open**: Code and data are fully open source.
- **Modular and Interoperable**: Each component can be replaced, extended, or removed. Works well in many environments (your laptop, in a cluster, or from the browser), and with multiple tools (thanks to the Arrow ecosystem). [Use open standards and share data in accesible formats](https://voltrondata.com/codex/a-new-frontier).
- **Permissionless**. Don't ask, fork it and improve the models, add a new source or update any script. No API limits, just plain files.
- **Data as Code**. Declarative stateless transformations tracked in `git`. Version your data as code! Publish and share your reusable models for others to build on top.
- **Modern**: Supports types, tests, materialized views, and more.
- **Glue**: Be a bridge between tools and aproaches. E.g: Use software engineering good practices like types, tests, materialized views, and more.

Datadex is not a new tool. **Datadex is a pattern showing an opinionated bridge between existing ones**.

Expand Down
4 changes: 3 additions & 1 deletion datadex/dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from . import assets

DBT_PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) + "/../dbt/"
DATA_DIR = os.path.dirname(os.path.abspath(__file__)) + "/../data/"


dbt_resource = dbt_cli_resource.configured(
{"project_dir": DBT_PROJECT_DIR, "profiles_dir": DBT_PROJECT_DIR}
Expand All @@ -18,7 +20,7 @@

resources = {
"dbt": dbt_resource,
"io_manager": DuckDBPandasIOManager(database=f"{os.getenv('DATA_DIR')}/local.duckdb"),
"io_manager": DuckDBPandasIOManager(database=DATA_DIR + "local.duckdb"),
}

defs = Definitions(assets=[*dbt_assets, *python_assets], resources=resources)
15 changes: 5 additions & 10 deletions dbt/dbt_project.yml
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
name: "datadex"
version: "1.0.0"
name: datadex
version: 1.0.0
config-version: 2

profile: "default"
profile: default

target-path: "target"
clean-targets:
- "logs"
- "target"
- "database"
- "dbt_packages"
target-path: target

models:
datadex:
+materialized: external
+materialized: table
Empty file removed dbt/plugins/__init__.py
Empty file.
15 changes: 0 additions & 15 deletions dbt/plugins/frictionless.py

This file was deleted.

4 changes: 2 additions & 2 deletions dbt/profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@ default:
outputs:
dev:
type: duckdb
path: "../{{ env_var('DATA_DIR') }}/local.duckdb"
path: "../data/local.duckdb"
threads: 8
extensions:
- httpfs
- parquet
settings:
enable_object_cache: true
enable_http_metadata_cache: true
external_root: "../{{ env_var('DATA_DIR') }}"
external_root: "../data/"
target: dev

config:
Expand Down
14 changes: 9 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
[project]
name = "datadex"
version = "1.0.0"
authors = [{ name = "David Gasquez" }]
dependencies = [
"altair",
"dagster-dbt",
Expand All @@ -24,18 +25,21 @@ requires-python = ">=3.11, <=3.12"
readme = "README.md"
license = { text = "MIT" }

[project.urls]
Homepage = "https://davidgasquez.github.io/datadex/"
Changelog = "https://github.com/davidgasquez/datadex/commits/main/"
Issues = "https://github.com/davidgasquez/datadex/issues"
CI = "https://github.com/davidgasquez/datadex/actions"

[project.optional-dependencies]
dev = ["dagit", "shandy-sqlfmt[jinjafmt]", "ruff"]

[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"

[tool.setuptools]
packages = ["datadex"]

[tool.setuptools.package-data]
"datadex" = ["../dbt/**"]

[tool.dagster]
module_name = "datadex"
[tool.setuptools]
packages = ["datadex"]

0 comments on commit 3da10dd

Please sign in to comment.