diff --git a/.github/workflows/build_all.yml b/.github/workflows/build_all.yml new file mode 100644 index 0000000..7b68830 --- /dev/null +++ b/.github/workflows/build_all.yml @@ -0,0 +1,42 @@ +name: Build all extensions +on: + workflow_dispatch: + inputs: + duckdb_version: + type: string + duckdb_tag: + type: string + deploy: + type: string + +jobs: + collect_extensions: + outputs: + COMMUNITY_EXTENSION_LIST: ${{ steps.generate_list.outputs.EXTENSION_LIST }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Generate extension list + id: generate_list + run: | + ./scripts/get_extension_list.sh + cat extension_list + cat extension_list >> $GITHUB_OUTPUT + + build_all: + needs: + - collect_extensions + strategy: + fail-fast: false + matrix: + extension_name: ${{ fromJson(needs.collect_extensions.outputs.COMMUNITY_EXTENSION_LIST) }} + uses: ./.github/workflows/build.yml + secrets: inherit + with: + extension_name: ${{ matrix.extension_name }} + duckdb_version: ${{ inputs.duckdb_version }} + duckdb_tag: ${{ inputs.duckdb_tag }} + deploy: ${{ inputs.deploy }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..febfc48 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +build/ +downloads-last-week.json diff --git a/README.md b/README.md index 4dc6807..6f22f82 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ This repository collects third-party extensions created for [DuckDB](https://www.duckdb.org). -Visit the [DuckDB community extensions website](https://community-extensions.duckdb.org) to learn more about the available extensions. +View the [list of Community Extensions](https://duckdb.org/community_extensions/list_of_extensions). + +Visit the [DuckDB Community Extensions website](https://duckdb.org/community_extensions) to learn more about the available extensions. [Learn about the official extensions and community extensions.](https://duckdb.org/2024/07/05/community-extensions) diff --git a/extensions/bigquery/description.yml b/extensions/bigquery/description.yml index 83a7b04..ebe5bc8 100644 --- a/extensions/bigquery/description.yml +++ b/extensions/bigquery/description.yml @@ -1,7 +1,7 @@ extension: name: bigquery description: Integrates DuckDB with Google BigQuery, allowing direct querying and management of BigQuery datasets - version: 1.0.0 + version: 0.1.0 language: C++ build: cmake license: MIT @@ -13,12 +13,12 @@ extension: repo: github: hafenkran/duckdb-bigquery - ref: 656055570493d26de86c4ab3a81f0c1b718835d2 + ref: e5cd4a9ac32ce3380559bc63869c442c14e1fb75 docs: hello_world: | -- Attach to your BigQuery Project - D ATTACH 'project=my_gcp_project' as bq (TYPE bigquery, READ_ONLY); + D ATTACH 'project=my_gcp_project' AS bq (TYPE bigquery, READ_ONLY); -- Show all tables in all datasets in the attached BigQuery project D SHOW ALL TABLES; diff --git a/extensions/bigquery/docs/function_description.csv b/extensions/bigquery/docs/function_description.csv deleted file mode 100644 index e901007..0000000 --- a/extensions/bigquery/docs/function_description.csv +++ /dev/null @@ -1,5 +0,0 @@ -function,description,comment,example -bigquery_scan,"Query a single table directly from BigQuery using the specified table.",,"SELECT * FROM bigquery_scan('my_gcp_project.quacking_dataset.duck_tbl');" -bigquery_execute,"Execute arbitrary queries directly in BigQuery using native GoogleSQL.",,"ATTACH 'project=my_gcp_project' AS bq (TYPE bigquery); -CALL bigquery_execute('bq', 'CREATE SCHEMA deluxe_dataset OPTIONS(location=""us"", default_table_expiration_days=3.75);')" -bigquery_clear_cache,"Clear the internal caches to refetch the most current project information from BigQuery.","","CALL bigquery_clear_cache();" diff --git a/extensions/bigquery/docs/function_descriptions.csv b/extensions/bigquery/docs/function_descriptions.csv new file mode 100644 index 0000000..6d8682e --- /dev/null +++ b/extensions/bigquery/docs/function_descriptions.csv @@ -0,0 +1,7 @@ +function,description,comment,example +bigquery_attach,"Attach to a BigQuery project.","","ATTACH 'project=my_gcp_project' as bq (TYPE bigquery);" +bigquery_scan,"Scan a single table directly from BigQuery.",,"SELECT * FROM bigquery_scan('my_gcp_project.quacking_dataset.duck_tbl');" +bigquery_query,"Run a custom GoogleSQL query in BigQuery and read the results.",,"SELECT * FROM bigquery_query('bq', 'SELECT * FROM quacking_dataset.duck_tbl WHERE duck_id = 123');" +bigquery_execute,"Execute an arbitrary GoogleSQL query in BigQuery.",,"CALL bigquery_execute('bq', 'CREATE SCHEMA deluxe_dataset OPTIONS(location=""us"", default_table_expiration_days=3.75);')" +bigquery_jobs,"List jobs in a BigQuery project.","","SELECT * FROM bigquery_jobs('bq');" +bigquery_clear_cache,"Clear the internal caches to refetch the most current project information from BigQuery.","","CALL bigquery_clear_cache();" diff --git a/extensions/chsql/description.yml b/extensions/chsql/description.yml index 9391282..e9c3476 100644 --- a/extensions/chsql/description.yml +++ b/extensions/chsql/description.yml @@ -9,13 +9,13 @@ extension: - lmangani repo: - github: lmangani/duckdb-extension-clickhouse-sql - ref: 89e1c2fd68c18e018f5afb7ef81b3454abb46dfc + github: quackscience/duckdb-extension-clickhouse-sql + ref: a000d4f2a5f9e1c13cddcd75ea2d3351466f1706 docs: hello_world: | -- Use ClickHouse SQL function macros in DuckDB SQL queries - SELECT toString('world') as hello, toInt8OrZero('world') as zero; + SELECT toString('world') AS hello, toInt8OrZero('world') AS zero; ┌─────────┬───────┐ │ hello │ zero │ @@ -45,4 +45,4 @@ docs: extended_description: | - This extension implements a growing number of [ClickHouse SQL Macros](https://community-extensions.duckdb.org/extensions/chsql.html#added-functions) and functions for DuckDB. + This extension implements a growing number of [ClickHouse SQL Macros](https://duckdb.org/community_extensions/extensions/chsql#added-functions) and functions for DuckDB. diff --git a/extensions/duckpgq/description.yml b/extensions/duckpgq/description.yml index 824a4f1..129c21e 100644 --- a/extensions/duckpgq/description.yml +++ b/extensions/duckpgq/description.yml @@ -14,8 +14,8 @@ repo: docs: hello_world: | - CREATE TABLE Person as select * from 'https://gist.githubusercontent.com/Dtenwolde/2b02aebbed3c9638a06fda8ee0088a36/raw/8c4dc551f7344b12eaff2d1438c9da08649d00ec/person-sf0.003.csv'; - CREATE TABLE Person_knows_person as select * from 'https://gist.githubusercontent.com/Dtenwolde/81c32c9002d4059c2c3073dbca155275/raw/8b440e810a48dcaa08c07086e493ec0e2ec6b3cb/person_knows_person-sf0.003.csv'; + CREATE TABLE Person AS SELECT * FROM 'https://gist.githubusercontent.com/Dtenwolde/2b02aebbed3c9638a06fda8ee0088a36/raw/8c4dc551f7344b12eaff2d1438c9da08649d00ec/person-sf0.003.csv'; + CREATE TABLE Person_knows_person AS SELECT * FROM 'https://gist.githubusercontent.com/Dtenwolde/81c32c9002d4059c2c3073dbca155275/raw/8b440e810a48dcaa08c07086e493ec0e2ec6b3cb/person_knows_person-sf0.003.csv'; CREATE PROPERTY GRAPH snb VERTEX TABLES ( @@ -24,7 +24,7 @@ docs: EDGE TABLES ( Person_knows_person SOURCE KEY (Person1Id) REFERENCES Person (id) DESTINATION KEY (Person2Id) REFERENCES Person (id) - LABEL Knows + LABEL knows ); FROM GRAPH_TABLE (snb @@ -52,4 +52,3 @@ docs: *Disclaimer*: As this extension is part of an ongoing research project by the Database Architectures group at CWI, some features may still be under development. We appreciate your understanding and patience as we continue to improve it. - diff --git a/extensions/evalexpr_rhai/description.yml b/extensions/evalexpr_rhai/description.yml index ddac16b..3ccbbdd 100644 --- a/extensions/evalexpr_rhai/description.yml +++ b/extensions/evalexpr_rhai/description.yml @@ -51,19 +51,19 @@ docs: \ employees (name text, state text, zip integer);\nINSERT INTO employees values\n\ \ ('Jane', 'FL', 33139),\n ('John', 'NJ', 08520);\n\n-- Pass the row from the\ \ employees table in as \"context.row\"\nSELECT evalexpr_rhai(\n '\n context.row.name\ - \ + \" is in \" + context.row.state\n ',\n {\n row: employees\n }) as result\ + \ + \" is in \" + context.row.state\n ',\n {\n row: employees\n }) AS result\ \ from employees;\n\n┌───────────────────────────────┐\n│ result \ \ │\n│ union(ok json, error varchar) │\n├───────────────────────────────┤\n\ │ \"Jane is in FL\" │\n│ \"John is in NJ\" │\n└───────────────────────────────┘\n\ \n-- To demonstrate how Rhai can be used to implement\n-- a function in DuckDB,\ \ the next example creates\n-- a macro function that calls a Rhai function\n--\ \ to calculate the Collatz sequence length.\n\nCREATE MACRO collatz_series_length(n)\ - \ as\n evalexpr_rhai('\n fn collatz_series(n) {\n let count = 0;\n\ + \ AS\n evalexpr_rhai('\n fn collatz_series(n) {\n let count = 0;\n\ \ while n > 1 {\n count += 1;\n if n % 2 == 0 {\n \ \ n /= 2;\n } else {\n n = n * 3 + 1;\n \ \ }\n }\n return count\n }\n collatz_series(context.n)\n\ \ ', {'n': n});\n\n-- Use the previously defined macro.\nSELECT\n collatz_series_length(range).ok::bigint\ - \ as sequence_length,\n range as starting_value\nFROM\n range(10000, 20000)\n\ + \ AS sequence_length,\n range AS starting_value\nFROM\n range(10000, 20000)\n\ ORDER BY 1 DESC limit 10;\n\n┌─────────────────┬────────────────┐\n│ sequence_length\ \ │ starting_value │\n│ int64 │ int64 │\n├─────────────────┼────────────────┤\n\ │ 278 │ 17647 │\n│ 278 │ 17673 │\n│\ diff --git a/extensions/faiss/description.yml b/extensions/faiss/description.yml index 9c46c23..ea0e9d5 100644 --- a/extensions/faiss/description.yml +++ b/extensions/faiss/description.yml @@ -1,6 +1,6 @@ extension: name: faiss - description: Provides a subset of the faiss API to duckdb + description: Provides a subset of the faiss API to DuckDB version: 0.9.0 language: C++ build: cmake @@ -21,16 +21,16 @@ docs: hello_world: | -- Generate semi-random input data and queries -- Note that the dimensionality of our data will be 5 - CREATE TABLE input AS SELECT i as id, apply(generate_series(1, 5), j-> CAST(hash(i*1000+j) AS FLOAT)/18446744073709551615) as data FROM generate_series(1, 1000) s(i); - CREATE TABLE queries AS SELECT i as id, apply(generate_series(1, 5), j-> CAST(hash(i*1000+j+8047329823) AS FLOAT)/18446744073709551615) as data FROM generate_series(1, 10) s(i); + CREATE TABLE input AS SELECT i AS id, apply(generate_series(1, 5), j-> CAST(hash(i*1000+j) AS FLOAT)/18446744073709551615) AS data FROM generate_series(1, 1000) s(i); + CREATE TABLE queries AS SELECT i AS id, apply(generate_series(1, 5), j-> CAST(hash(i*1000+j+8047329823) AS FLOAT)/18446744073709551615) AS data FROM generate_series(1, 10) s(i); -- Create the index and insert data into it CALL FAISS_CREATE('name', 5, 'IDMap,HNSW32'); CALL FAISS_ADD((SELECT id, data FROM input), 'name'); -- Get 10 results with uneven id - SELECT id, UNNEST(FAISS_SEARCH_FILTER('name', 10, data, 'id%2==0', 'rowid', 'input')) FROM queries; + SELECT id, UNNEST(FAISS_SEARCH_FILTER('name', 10, data, 'id%2==1', 'rowid', 'input')) FROM queries; -- Get 10 results with even id SELECT id, UNNEST(FAISS_SEARCH_FILTER('name', 10, data, 'id%2==0', 'rowid', 'input')) FROM queries; -- Get 10 results SELECT id, UNNEST(FAISS_SEARCH('name', 10, data)) FROM queries; extended_description: | - The FAISS extension allows duckdb users to store vector data in faiss, and query this data, making reliable vector search more accessible. + The FAISS extension allows DuckDB users to store vector data in faiss, and query this data, making reliable vector search more accessible. diff --git a/extensions/flockmtl/description.yml b/extensions/flockmtl/description.yml new file mode 100644 index 0000000..f13c23e --- /dev/null +++ b/extensions/flockmtl/description.yml @@ -0,0 +1,44 @@ +extension: + name: flockmtl + description: DuckDB LLM Extension + version: 0.1.0 + language: SQL & C++ + build: cmake + license: MIT + maintainers: + - dorbanianas + - SunnyYasser + - queryproc + +repo: + github: dsg-polymtl/duckdb-flockmtl + ref: 1bd8ac0f54f8bf4c7da1c3793b88e73daa127653 + +docs: + hello_world: | + -- After loading, any function call will throw an error if an OPENAI_API_KEY environment variable is not set + + -- Call an OpenAI model with a predefined prompt ('Tell me hello world') and default model ('gpt-4o-mini') + D SELECT llm_complete('hello-world', 'default'); + ┌──────────────────────────────────────────┐ + │ llm_complete(hello_world, default_model) │ + │ varchar │ + ├──────────────────────────────────────────┤ + │ Hello world │ + └──────────────────────────────────────────┘ + + -- Check the prompts and supported models + D GET PROMPTS; + D GET MODELS; + + -- Create a new prompt for summarizing text + D CREATE PROMPT('summarize', 'summarize the text into 1 word: {{text}}'); + + -- Create a variable name for the model to do the summarizing + D CREATE MODEL('summarizer-model', 'gpt-4o', 128000); + + -- Summarize text and pass it as parameter + D SELECT llm_complete('summarize', 'summarizer-model', {'text': 'We support more functions and approaches to combine relational analytics and semantic analysis. Check our repo for documentation and examples.'}); + + extended_description: | + This extension is experimental and potentially unstable. Do not use it in production. diff --git a/extensions/fuzzycomplete/description.yml b/extensions/fuzzycomplete/description.yml index 012145b..4708cc6 100644 --- a/extensions/fuzzycomplete/description.yml +++ b/extensions/fuzzycomplete/description.yml @@ -49,7 +49,7 @@ docs: CREATE TABLE automobile_vehicles(serial_number text); - SELECT suggestion from sql_auto_complete(''SELECT * from veh''); + SELECT suggestion FROM sql_auto_complete(''SELECT * FROM veh''); ┌─────────────────────┐ @@ -64,7 +64,7 @@ docs: └─────────────────────┘ - select suggestion from sql_auto_complete(''SELECT * from auto''); + SELECT suggestion FROM sql_auto_complete(''SELECT * FROM auto''); ┌──────────────────────────────────────┐ @@ -89,7 +89,7 @@ docs: └──────────────────────────────────────┘ - SELECT suggestion from sql_auto_complete(''SELECT * from bar''); + SELECT suggestion FROM sql_auto_complete(''SELECT * FROM bar''); ┌──────────────────────────────────────┐ @@ -108,7 +108,7 @@ docs: -- Demonstrate completion across databases/catalogs and schemas. - SELECT suggestion from sql_auto_complete(''SELECT * from table''); + SELECT suggestion FROM sql_auto_complete(''SELECT * FROM table''); ┌───────────────────────────────────────────────┐ diff --git a/extensions/gsheets/description.yml b/extensions/gsheets/description.yml new file mode 100644 index 0000000..bf245f4 --- /dev/null +++ b/extensions/gsheets/description.yml @@ -0,0 +1,57 @@ +extension: + name: gsheets + description: Read and write Google Sheets using SQL + version: 0.0.2 + language: C++ + build: cmake + license: MIT + excluded_platforms: "windows_amd64_rtools;wasm_mvp;wasm_eh;wasm_threads" + maintainers: + - archiewood + +repo: + github: evidence-dev/duckdb_gsheets + ref: 01bdf2872b78af1b9234792766392840fc4a91b7 + +docs: + hello_world: | + -- Authenticate with Google Account in the browser (easiest) + CREATE SECRET (TYPE gsheet); + + -- OR create a secret with your Google API access token (boring, see extension docs) + CREATE SECRET ( + TYPE gsheet, + PROVIDER access_token, + TOKEN '' + ); + + -- Read a spreadsheet by full URL + FROM read_gsheet('https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit'); + + -- Read a spreadsheet by full URL, implicitly + FROM 'https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit'; + + -- Read a spreadsheet by spreadsheet id + FROM read_gsheet('11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8'); + + -- Read a spreadsheet with no header row + SELECT * FROM read_gsheet('11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8', headers=false); + + -- Read a sheet other than the first sheet using the sheet name + SELECT * FROM read_gsheet('11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8', sheet='Sheet2'); + + -- Read a sheet other than the first sheet using the sheet id in the URL + SELECT * FROM read_gsheet('https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit?gid=644613997#gid=644613997'); + + -- Write a spreadsheet from a table by spreadsheet id + COPY TO '11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8' (FORMAT gsheet); + + -- Write a spreadsheet from a table by full URL + COPY TO 'https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit?usp=sharing' (FORMAT gsheet); + + -- Write a spreadsheet to a specific sheet using the sheet id in the URL + COPY TO 'https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit?gid=1295634987#gid=1295634987' (FORMAT gsheet); + + extended_description: | + The DuckDB GSheets Extension allows reading and writing of data in Google Sheets from DuckDB. + For detailed setup and usage instructions, visit the docs at [duckdb-gsheets.com](https://duckdb-gsheets.com). diff --git a/extensions/gsheets/docs/function_description.csv b/extensions/gsheets/docs/function_description.csv new file mode 100644 index 0000000..184ecac --- /dev/null +++ b/extensions/gsheets/docs/function_description.csv @@ -0,0 +1,3 @@ +function,description,comment,example +read_gsheet,"Read a single sheet directly from a Google Sheet via the sheet URL, or spreadsheet ID.",,"SELECT * FROM read_gsheet('https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit?gid=644613997#gid=644613997');" +"COPY TO","Write data from a table to a Google Sheet via the sheet URL, or spreadsheet ID.",,"COPY TO 'https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit?gid=1295634987#gid=1295634987' (FORMAT gsheet);" \ No newline at end of file diff --git a/extensions/hostfs/description.yml b/extensions/hostfs/description.yml new file mode 100644 index 0000000..6e0615f --- /dev/null +++ b/extensions/hostfs/description.yml @@ -0,0 +1,53 @@ +extension: + name: hostfs + description: Navigate and explore the filesystem using SQL + version: 0.0.1 + language: C++ + build: cmake + license: MIT + maintainers: + - Gropaul + +repo: + github: gropaul/hostFS + ref: 1aa25bee5eb7b95f6d46504ed72336f90468588a + +docs: + hello_world: | + -- Navigate to the workspace and list the files + D PRAGMA cd('/Users/paul/workspace'); + D PRAGMA ls; + ┌───────────────────────────────┐ + │ path │ + │ varchar │ + ├───────────────────────────────┤ + │ ./duckdb │ + │ ./playground │ + │ ./hostfs │ + -- Find the files you were working on last + D SELECT path, file_last_modified(path) AS date FROM ls() WHERE 'csv' IN file_extension(path) ORDER BY date LIMIT 1 ; + ┌───────────────────────────┬─────────────────────┐ + │ path │ date │ + │ varchar │ timestamp │ + ├───────────────────────────┼─────────────────────┤ + │ ./sketch_results_join.csv │ 2024-07-13 23:25:48 │ + └───────────────────────────┴─────────────────────┘ + -- List the top 3 file types by total size, with file count, ordered by size. + D SELECT size, count, file_extension AS "type" + FROM ( + SELECT SUM(file_size(path)) AS size_raw, format_bytes(size_raw) AS size, COUNT(*) AS count, file_extension(path) AS file_extension + FROM lsr('/Users/paul/workspace', 10) + GROUP BY file_extension(path) + ) AS subquery + ORDER BY size_raw DESC LIMIT 3; + ┌───────────┬───────┬─────────┐ + │ size │ count │ type │ + │ varchar │ int64 │ varchar │ + ├───────────┼───────┼─────────┤ + │ 246.95 GB │ 29 │ .duckdb │ + │ 90.33 GB │ 3776 │ .tmp │ + │ 26.17 GB │ 28175 │ .csv │ + └───────────┴───────┴─────────┘ + extended_description: > + The HostFS extension allows you to navigate and explore the filesystem using SQL. It provides a set of functions to list files, get file metadata, and more. + For more information, please see the [HostFS documentation](https://github.com/gropaul/hostFS). \ No newline at end of file diff --git a/extensions/http_client/description.yml b/extensions/http_client/description.yml new file mode 100644 index 0000000..a28d1a4 --- /dev/null +++ b/extensions/http_client/description.yml @@ -0,0 +1,82 @@ +extension: + name: http_client + description: DuckDB HTTP Client Extension + version: 0.0.2 + language: C++ + build: cmake + license: MIT + maintainers: + - lmangani + - ahuarte47 + +repo: + github: quackscience/duckdb-extension-httpclient + ref: db0ebb7f8c2688ff7a785b83a387bf782d13afd1 + +docs: + hello_world: | + -- GET Request Example w/ JSON Parsing + WITH __input AS ( + SELECT + http_get( + 'https://httpbin.org/delay/0' + ) AS res + ), + __response AS ( + SELECT + (res->>'status')::INT AS status, + (res->>'reason') AS reason, + unnest( from_json(((res->>'body')::JSON)->'headers', '{"Host": "VARCHAR"}') ) AS features + FROM + __input + ) + SELECT + __response.status, + __response.reason, + __response.Host AS host, + FROM + __response + ; + ┌────────┬─────────┬─────────────┐ + │ status │ reason │ host │ + │ int32 │ varchar │ varchar │ + ├────────┼─────────┼─────────────┤ + │ 200 │ OK │ httpbin.org │ + └────────┴─────────┴─────────────┘ + + -- POST Request Example w/ Headers and Parameters + WITH __input AS ( + SELECT + http_post( + 'https://httpbin.org/delay/0', + headers => MAP { + 'accept': 'application/json', + }, + params => MAP { + } + ) AS res + ), + __response AS ( + SELECT + (res->>'status')::INT AS status, + (res->>'reason') AS reason, + unnest( from_json(((res->>'body')::JSON)->'headers', '{"Host": "VARCHAR"}') ) AS features + FROM + __input + ) + SELECT + __response.status, + __response.reason, + __response.Host AS host, + FROM + __response + ; + ┌────────┬─────────┬─────────────┐ + │ status │ reason │ host │ + │ int32 │ varchar │ varchar │ + ├────────┼─────────┼─────────────┤ + │ 200 │ OK │ httpbin.org │ + └────────┴─────────┴─────────────┘ + + extended_description: | + The HTTP Client Extension is experimental, use at your own risk! diff --git a/extensions/httpserver/description.yml b/extensions/httpserver/description.yml index 4fcde5e..d98d518 100644 --- a/extensions/httpserver/description.yml +++ b/extensions/httpserver/description.yml @@ -1,7 +1,7 @@ extension: name: httpserver description: DuckDB HTTP API Server Extension - version: 0.1.1 + version: 0.1.3 language: SQL & C++ build: cmake license: MIT @@ -11,24 +11,36 @@ extension: - akvlad repo: - github: lmangani/duckdb-extension-httpserver - ref: 4122f8d4ca0ece71f8138a0c619b689c880b59aa + github: quackscience/duckdb-extension-httpserver + ref: 9f075f6e39e171560c622f10b05f614646470e8b docs: hello_world: | -- Start a DuckDB HTTP API Server with parameters - D SELECT httpserve_start('0.0.0.0',9999); - ┌─────────────────────────────────────┐ - │ httpserve_start('0.0.0.0', 9999) │ - │ varchar │ - ├─────────────────────────────────────┤ - │ HTTP server started on 0.0.0.0:9999 │ - └─────────────────────────────────────┘ + D SELECT httpserve_start('0.0.0.0', 9999, 'user:pass'); + ┌───────────────────────────────────────────────┐ + │ httpserve_start('0.0.0.0', 9999, 'user:pass') │ + │ varchar │ + ├───────────────────────────────────────────────┤ + │ HTTP server started on 0.0.0.0:9999 │ + └───────────────────────────────────────────────┘ - -- Query your DuckDB HTTP API Server using curl or any other client - curl -X POST -d "LOAD chsql; SELECT *, uuid() FROM numbers(10)" http://localhost:9999/ + -- Browse to your DuckDB HTTP API endpoint to Query using the embedded interface + + -- Query your DuckDB HTTP API Server using curl or any other client w/ HTTP Basic Auth + curl -X POST -d "LOAD chsql; SELECT *, uuid() FROM numbers(10)" "http://user:pass@localhost:9999/" + + -- Query your DuckDB HTTP API Server using curl or any other client w/ X-API-Key header + curl -X POST --header "X-API-Key: secretkey" -d "LOAD chsql; SELECT *, uuid() FROM numbers(10)" "http://localhost:9999/" + + -- Query your DuckDB HTTP API Server using DuckDB HTTPFS extension w/ Header Authentication + D CREATE SECRET extra_http_headers ( + TYPE HTTP, + EXTRA_HTTP_HEADERS MAP{ + 'X-API-Key': 'secretkey' + } + ); - -- Browse to your DuckDB HTTP API endpoint to query using the embedded interface extended_description: | - This extension is highly experimental and potentially unstable. Do not use it in production. + This extension is experimental and potentially unstable. Do not use it in production. diff --git a/extensions/lindel/description.yml b/extensions/lindel/description.yml index a3d069f..3dda2ff 100644 --- a/extensions/lindel/description.yml +++ b/extensions/lindel/description.yml @@ -57,10 +57,10 @@ docs: \ |\n| `UBIGINT` | 2 | 1: `UBIGINT`
2: `UHUGEINT` |\n| `FLOAT` | 4\ \ | 1: `UINTEGER`
2: `UBIGINT`
3-4: `UHUGEINT` |\n| `DOUBLE` | 2 |\ \ 1: `UBIGINT`
2: `UHUGEINT` |\n" - hello_world: "WITH elements as (\n SELECT * as id FROM range(3)\n)\nSELECT\n a.id\ - \ as a,\n b.id as b,\n hilbert_encode([a.id, b.id]::tinyint[2]) as hilbert,\n\ - \ morton_encode([a.id, b.id]::tinyint[2]) as morton\nFROM\n elements as a cross\ - \ join elements as b;\n┌───────┬───────┬─────────┬────────┐\n│ a │ b │\ + hello_world: "WITH elements AS (\n SELECT * AS id FROM range(3)\n)\nSELECT\n a.id\ + \ AS a,\n b.id AS b,\n hilbert_encode([a.id, b.id]::tinyint[2]) AS hilbert,\n\ + \ morton_encode([a.id, b.id]::tinyint[2]) AS morton\nFROM\n elements AS a CROSS\ + \ JOIN elements AS b;\n┌───────┬───────┬─────────┬────────┐\n│ a │ b │\ \ hilbert │ morton │\n│ int64 │ int64 │ uint16 │ uint16 │\n├───────┼───────┼─────────┼────────┤\n\ │ 0 │ 0 │ 0 │ 0 │\n│ 0 │ 1 │ 3 │ 1 │\n│\ \ 0 │ 2 │ 4 │ 4 │\n│ 1 │ 0 │ 1 │ 2 │\n│\ @@ -68,22 +68,22 @@ docs: \ 2 │ 0 │ 14 │ 8 │\n│ 2 │ 1 │ 13 │ 9 │\n│\ \ 2 │ 2 │ 8 │ 12 │\n└───────┴───────┴─────────┴────────┘\n\n\ -- Encode two 32-bit floats into one uint64\nSELECT hilbert_encode([37.8, .2]::float[2])\ - \ as hilbert;\n┌─────────────────────┐\n│ hilbert │\n│ uint64\ + \ AS hilbert;\n┌─────────────────────┐\n│ hilbert │\n│ uint64\ \ │\n├─────────────────────┤\n│ 2303654869236839926 │\n└─────────────────────┘\n\ \n-- Since doubles use 64 bits of precision the encoding\n-- must result in a\ - \ uint128\n\nSELECT hilbert_encode([37.8, .2]::double[2]) as hilbert;\n┌────────────────────────────────────────┐\n\ + \ uint128\n\nSELECT hilbert_encode([37.8, .2]::double[2]) AS hilbert;\n┌────────────────────────────────────────┐\n\ │ hilbert │\n│ uint128 \ \ │\n├────────────────────────────────────────┤\n│ 42534209309512799991913666633619307890\ \ │\n└────────────────────────────────────────┘\n\n-- 3 dimensional encoding.\n\ - SELECT hilbert_encode([1.0, 5.0, 6.0]::float[3]) as hilbert;\n┌──────────────────────────────┐\n\ + SELECT hilbert_encode([1.0, 5.0, 6.0]::float[3]) AS hilbert;\n┌──────────────────────────────┐\n\ │ hilbert │\n│ uint128 │\n├──────────────────────────────┤\n\ │ 8002395622101954260073409974 │\n└──────────────────────────────┘\n\n-- Demonstrate\ \ string encoding\nSELECT hilbert_encode([ord(x) for x in split('abcd', '')]::tinyint[4])\ - \ as hilbert;\n┌───────────┐\n│ hilbert │\n│ uint32 │\n├───────────┤\n│ 178258816\ + \ AS hilbert;\n┌───────────┐\n│ hilbert │\n│ uint32 │\n├───────────┤\n│ 178258816\ \ │\n└───────────┘\n\n-- Start out just by encoding two values.\nSELECT hilbert_encode([1,\ - \ 2]::tinyint[2]) as hilbert;\n┌─────────┐\n│ hilbert │\n│ uint16 │\n├─────────┤\n\ + \ 2]::tinyint[2]) AS hilbert;\n┌─────────┐\n│ hilbert │\n│ uint16 │\n├─────────┤\n\ │ 7 │\n└─────────┘\n\n-- Decode an encoded value\nSELECT hilbert_decode(7::uint16,\ - \ 2, false, true) as values;\n┌─────────────┐\n│ values │\n│ utinyint[2]\ + \ 2, false, true) AS values;\n┌─────────────┐\n│ values │\n│ utinyint[2]\ \ │\n├─────────────┤\n│ [1, 2] │\n└─────────────┘\n\n-- The decoding functions\ \ take four parameters:\n-- 1. **Value to be decoded:** This is always an unsigned\ \ integer type.\n-- 2. **Number of elements to decode:** This is a `TINYINT` specifying\ @@ -94,7 +94,7 @@ docs: \ return type of these functions is always an array, with the element type determined\ \ by the number of elements requested and whether \"float\" handling is enabled\ \ by the third parameter.\n\nSELECT hilbert_decode(hilbert_encode([1, -2]::bigint[2]),\ - \ 2, false, false) as values;\n┌───────────┐\n│ values │\n│ bigint[2] │\n├───────────┤\n\ + \ 2, false, false) AS values;\n┌───────────┐\n│ values │\n│ bigint[2] │\n├───────────┤\n\ │ [1, -2] │\n└───────────┘\n" extension: build: cmake diff --git a/extensions/open_prompt/description.yml b/extensions/open_prompt/description.yml new file mode 100644 index 0000000..9e26b82 --- /dev/null +++ b/extensions/open_prompt/description.yml @@ -0,0 +1,57 @@ +extension: + name: open_prompt + description: Interact with LLMs with a simple DuckDB Extension + version: 0.0.3 + language: C++ + build: cmake + license: MIT + maintainers: + - lmangani + - akvlad + +repo: + github: quackscience/duckdb-extension-openprompt + ref: 616bdfc4e7b01c4095a2dda8d4104c179922efd6 + +docs: + hello_world: | + -- Configure the required extension parameters + SET VARIABLE openprompt_api_url = 'http://localhost:11434/v1/chat/completions'; + SET VARIABLE openprompt_api_token = 'optional_api_token_here'; + SET VARIABLE openprompt_model_name = 'qwen2.5:0.5b'; + + -- Prompt any OpenAI Completions API form your query + D SELECT open_prompt('Write a one-line poem about ducks') AS response; + ┌────────────────────────────────────────────────┐ + │ response │ + │ varchar │ + ├────────────────────────────────────────────────┤ + │ Ducks quacking at dawn, swimming in the light. │ + └────────────────────────────────────────────────┘ + + -- Prompt requesting JSON Structured Output for ChatGPT, LLama3, etc + SET VARIABLE openprompt_model_name = 'llama3.2:3b'; + SELECT open_prompt('I want ice cream', json_schema := '{ + "type": "object", + "properties": { + "summary": { "type": "string" }, + "sentiment": { "type": "string", "enum": ["pos", "neg", "neutral"] } + }, + "required": ["summary", "sentiment"], + "additionalProperties": false + }'); + + -- Use Custom System Prompt to request JSON Output in smaller models + SET VARIABLE openprompt_model_name = 'qwen2.5:1.5b'; + SELECT open_prompt('I want ice cream.', system_prompt:='Response MUST be JSON with the following schema: { + "type": "object", + "properties": { + "summary": { "type": "string" }, + "sentiment": { "type": "string", "enum": ["pos", "neg", "neutral"] } + }, + "required": ["summary", "sentiment"], + "additionalProperties": false + }'); + + extended_description: | + For examples and instructions check out the `open_prompt` [README](https://github.com/quackscience/duckdb-extension-openprompt) diff --git a/extensions/quack/description.yml b/extensions/quack/description.yml index 0c8d5d0..1715f08 100644 --- a/extensions/quack/description.yml +++ b/extensions/quack/description.yml @@ -23,4 +23,4 @@ docs: hello_world: | SELECT quack('world'); extended_description: | - The quack extension is based on DuckDB's [Extension Template](https://duckdb/extension_template/), and it's a great starting point to get started building more advanced extensions. + The quack extension is based on DuckDB's [Extension Template](https://github.com/duckdb/extension-template/), and it's a great starting point to get started building more advanced extensions. diff --git a/extensions/scrooge/description.yml b/extensions/scrooge/description.yml index d9a0053..96a7986 100644 --- a/extensions/scrooge/description.yml +++ b/extensions/scrooge/description.yml @@ -16,14 +16,14 @@ repo: docs: hello_world: | -- Set the RPC Provider - set eth_node_url= 'https://mempool.merkle.io/rpc/eth/pk_mbs_0b647b195065b3294a5254838a33d062'; + SET eth_node_url = 'https://mempool.merkle.io/rpc/eth/pk_mbs_0b647b195065b3294a5254838a33d062'; -- Query Transfer events of USDT from blocks 20034078 - 20034100 while parallelizing on one block per thread FROM read_eth( - 'USDT', - 'Transfer', - 20034078, - 20034100, - blocks_per_thread=1 + 'USDT', + 'Transfer', + 20034078, + 20034100, + blocks_per_thread = 1 ); extended_description: | Scrooge McDuck is a third-party financial extension for DuckDB. diff --git a/extensions/sheetreader/description.yml b/extensions/sheetreader/description.yml index bf1252c..8f38850 100644 --- a/extensions/sheetreader/description.yml +++ b/extensions/sheetreader/description.yml @@ -20,14 +20,14 @@ docs: -- Example usage of available named parameters CREATE TABLE data2 AS FROM sheetreader( - 'data2.xlsx', - sheet_index=1, - threads=16, - skip_rows=0, - has_header=TRUE, - types=[BOOLEAN,VARCHAR], - coerce_to_string=TRUE, - force_types=TRUE + 'data2.xlsx', + sheet_index = 1, + threads = 16, + skip_rows = 0, + has_header = true, + types = [BOOLEAN, VARCHAR], + coerce_to_string = true, + force_types = true ); @@ -55,7 +55,7 @@ docs: SheetReader was published in the [Information Systems Journal](https://www.sciencedirect.com/science/article/abs/pii/S0306437923000194) - ``` + ```bibtex @article{DBLP:journals/is/GavriilidisHZM23, author = {Haralampos Gavriilidis and Felix Henze and diff --git a/extensions/shellfs/description.yml b/extensions/shellfs/description.yml index 4bb3224..573b7ec 100644 --- a/extensions/shellfs/description.yml +++ b/extensions/shellfs/description.yml @@ -63,7 +63,7 @@ docs: \ configuration parameter.\n" hello_world: '-- Generate a sequence only return numbers that contain a 2 - SELECT * from read_csv(''seq 1 100 | grep 2 |''); + SELECT * FROM read_csv(''seq 1 100 | grep 2 |''); ┌─────────┐ @@ -90,7 +90,7 @@ docs: -- demonstrate how commands can be chained together - SELECT * from read_csv(''seq 1 35 | awk "\$1 % 7 == 0" | head -n 2 |''); + SELECT * FROM read_csv(''seq 1 35 | awk "\$1 % 7 == 0" | head -n 2 |''); ┌─────────┐ @@ -109,7 +109,7 @@ docs: -- Do some arbitrary curl - SELECT abbreviation, unixtime from + SELECT abbreviation, unixtime FROM read_json(''curl -s http://worldtimeapi.org/api/timezone/Etc/UTC |''); diff --git a/extensions/tarfs/description.yml b/extensions/tarfs/description.yml index c3c8be4..c6595a0 100644 --- a/extensions/tarfs/description.yml +++ b/extensions/tarfs/description.yml @@ -18,4 +18,3 @@ docs: extended_description: | This extension provides a duckdb file-system abstraction to read and glob files within __uncompressed__ tar archives. For more information and information regarding usage, limitations and performance, see the [tarfs README](https://github.com/Maxxen/duckdb_tarfs). - diff --git a/extensions/ulid/description.yml b/extensions/ulid/description.yml index 20785a6..fcda6fd 100644 --- a/extensions/ulid/description.yml +++ b/extensions/ulid/description.yml @@ -14,7 +14,7 @@ repo: docs: hello_world: | - SELECT ulid() as result; + SELECT ulid() AS result; extended_description: | This extension adds a new `ULID` data type to DuckDB. A [ULID](https://github.com/ulid/spec) is similar to a UUID except that it also contains a timestamp component, which makes it more suitable for use cases where the order of creation is important. diff --git a/layout/screenshot.md b/layout/screenshot.md new file mode 100644 index 0000000..8a077d8 --- /dev/null +++ b/layout/screenshot.md @@ -0,0 +1,6 @@ +```sql +INSTALL {{ page.extension.name }} + FROM community; + +LOAD {{ page.extension.name }}; +``` diff --git a/scripts/fetch_extensions.sh b/scripts/fetch_extensions.sh index 237d0ed..04444fc 100755 --- a/scripts/fetch_extensions.sh +++ b/scripts/fetch_extensions.sh @@ -1,5 +1,12 @@ +#!/usr/bin/env bash + set -eo pipefail +if [ $# -lt 1 ]; then + echo "Usage: ./scripts/generated_docs_readme.sh path_to_duckdb_binary" + exit 1 +fi + rm -rf build for extension_folder in extensions/*; do diff --git a/scripts/generate_md.sh b/scripts/generate_md.sh index 8b3780a..613bfb5 100755 --- a/scripts/generate_md.sh +++ b/scripts/generate_md.sh @@ -1,10 +1,14 @@ #!/usr/bin/env bash -# Example of use -# ./scripts/generated_docs_readme.sh build/release/duckdb - set -eo pipefail +if [ $# -lt 1 ]; then + echo "Usage: ./scripts/generate_md.sh build/release/duckdb" + exit 1 +fi + +curl -s https://community-extensions.duckdb.org/downloads-last-week.json -o build/downloads-last-week.json + platform=$($1 -csv -c "PRAGMA platform" | tail -n1) version_raw=$($1 -csv -c "PRAGMA version" | tail -n1) version=$(echo "$version_raw-,$version_raw" | cut -d '-' -f 2 | cut -d ',' -f 2) @@ -15,12 +19,13 @@ rm -rf $DOCS mkdir -p $DOCS EXTENSIONS_CSV=$DOCS/community_extensions.csv -echo "name, repo, ref, description" > $EXTENSIONS_CSV +echo "name,repo,ref,description" > $EXTENSIONS_CSV for extension_file in build/extension_dir/$version/$platform/*.duckdb_extension; do extension_full=$(basename -- $extension_file) extension="${extension_full%%.*}" echo "Generating docs for $extension" + EXTENSION_SCREENSHOT=$DOCS/../screenshot/$extension.md EXTENSION_README=$DOCS/$extension.md things="extensions functions settings types" rm -f pre.db @@ -33,6 +38,7 @@ do done mkdir -p $DOCS/$extension + mkdir -p $DOCS/../screenshot $1 post.db -c "ATTACH 'pre.db'; CREATE OR REPLACE TABLE fun_no_overload AS SELECT function_name, function_type, split_part(description, chr(10), 1) as description, comment, example FROM (FROM (SELECT function_name, function_type, description, comment, example FROM functions ORDER BY function_name) EXCEPT (SELECT function_name, function_type, description, comment, example FROM pre.functions ORDER BY function_name)) GROUP BY ALL ORDER BY function_name;" $1 post.db -c "ATTACH 'pre.db'; CREATE OR REPLACE TABLE fun_with_overload AS SELECT function_name, function_type, split_part(description, chr(10), 1) as description, comment, example FROM (FROM ( SELECT count(*), function_name, function_type, description, comment, example FROM functions GROUP BY ALL ORDER BY function_name) EXCEPT (SELECT count(*), function_name, function_type, description, comment, example FROM pre.functions GROUP BY ALL ORDER BY function_name)) GROUP BY ALL ORDER BY function_name;" @@ -64,7 +70,7 @@ do rm -f post.db echo "---" > $EXTENSION_README - echo "layout: community_extension" >> $EXTENSION_README + echo "warning: DO NOT CHANGE THIS MANUALLY, THIS IS GENERATED BY https://github/duckdb/community-extensions repository, check README there" >> $EXTENSION_README echo "title: $extension" >> $EXTENSION_README echo "excerpt: |" >> $EXTENSION_README echo " DuckDB Community Extensions" >> $EXTENSION_README @@ -76,11 +82,27 @@ do echo "" >> $EXTENSION_README cat extensions/$extension/description.yml >> $EXTENSION_README echo "" >> $EXTENSION_README - echo -n "extension_star_count: " >> $EXTENSION_README - python3 scripts/get_stars.py extensions/$extension/description.yml $1 >> $EXTENSION_README - echo "" >> $EXTENSION_README + + STAR_COUNT=$(python3 scripts/get_stars.py extensions/$extension/description.yml $1) + STAR_COUNT_PRETTY=$(echo $STAR_COUNT | python3 scripts/pretty_print.py) + echo "extension_star_count: $STAR_COUNT" >> $EXTENSION_README + echo "extension_star_count_pretty: $STAR_COUNT_PRETTY" >> $EXTENSION_README + + DOWNLOAD_COUNT=$(cat build/downloads-last-week.json | jq ".${extension}") + DOWNLOAD_COUNT_PRETTY=$(echo $DOWNLOAD_COUNT | python3 scripts/pretty_print.py) + echo "extension_download_count: $DOWNLOAD_COUNT" >> $EXTENSION_README + echo "extension_download_count_pretty: $DOWNLOAD_COUNT_PRETTY" >> $EXTENSION_README fi + echo "image: '/images/community_extensions/social_preview/preview_community_extension_"$extension".png'" >> $EXTENSION_README + cat $EXTENSION_README > $EXTENSION_SCREENSHOT + echo "layout: community_extension_sql_screenshot" >> $EXTENSION_SCREENSHOT + echo "---" >> $EXTENSION_SCREENSHOT + cat layout/screenshot.md >> $EXTENSION_SCREENSHOT + + + echo "layout: community_extension_doc" >> $EXTENSION_README echo "---" >> $EXTENSION_README + cat layout/default.md >> $EXTENSION_README if [ -s "$DOCS/$extension/functions.md" ]; then @@ -125,5 +147,5 @@ do done rm -f x.db -$1 $DOCS/x.db -markdown -c "SELECT '['||#1||']({% link extensions/'||#1||'.md %})' as Name, '[GitHub](https://github.com/'||#2||')' as GitHub , #4 as Description FROM read_csv('build/docs/community_extensions.csv');" > $DOCS/extensions_list.md.tmp +$1 $DOCS/x.db -markdown -c "SELECT '['||#1||']({% link community_extensions/extensions/'||#1||'.md %})' as Name, '[GitHub](https://github.com/'||#2||')' as GitHub , #4 as Description FROM read_csv('build/docs/community_extensions.csv');" > $DOCS/extensions_list.md.tmp rm -f x.db diff --git a/scripts/pretty_print.py b/scripts/pretty_print.py new file mode 100644 index 0000000..b1d2e04 --- /dev/null +++ b/scripts/pretty_print.py @@ -0,0 +1,15 @@ +import sys + +line = sys.stdin.readline() + +if line is None or line == '': + print('n/a') +else: + try: + x = int(line) + if x < 1000: + print(x) + else: + print(f'{x / 1000:.1f}k') + except ValueError: + print('n/a')