From 872ec0a1f99ca0b55a8c8c5491da09a7da66ce13 Mon Sep 17 00:00:00 2001 From: Gabriel Kronberger Date: Fri, 24 Feb 2023 01:28:01 +0100 Subject: [PATCH] Nikuradse (#175) * Added two datasets from the Nikuradse 'Flow in Rought Pipes' paper. * update dataset files Created by https://github.com/heal-research/pmlb/actions/runs/4254088850\nfrom 5becab4 on 2023-02-23 * Update metadata.yaml files. --------- Co-authored-by: github-actions[bot] --- .lfs-assets-id | 2 ++ datasets/nikuradse_1/README.md | 6 +++++ datasets/nikuradse_1/SOURCE.txt | 30 +++++++++++++++++++++++++ datasets/nikuradse_1/metadata.yaml | 27 ++++++++++++++++++++++ datasets/nikuradse_1/nikuradse_1.tsv.gz | 3 +++ datasets/nikuradse_1/summary_stats.tsv | 2 ++ datasets/nikuradse_2/README.md | 6 +++++ datasets/nikuradse_2/SOURCE.txt | 20 +++++++++++++++++ datasets/nikuradse_2/metadata.yaml | 24 ++++++++++++++++++++ datasets/nikuradse_2/nikuradse_2.tsv.gz | 3 +++ datasets/nikuradse_2/summary_stats.tsv | 2 ++ pmlb/all_summary_stats.tsv | 2 ++ 12 files changed, 127 insertions(+) create mode 100644 datasets/nikuradse_1/README.md create mode 100644 datasets/nikuradse_1/SOURCE.txt create mode 100644 datasets/nikuradse_1/metadata.yaml create mode 100644 datasets/nikuradse_1/nikuradse_1.tsv.gz create mode 100644 datasets/nikuradse_1/summary_stats.tsv create mode 100644 datasets/nikuradse_2/README.md create mode 100644 datasets/nikuradse_2/SOURCE.txt create mode 100644 datasets/nikuradse_2/metadata.yaml create mode 100644 datasets/nikuradse_2/nikuradse_2.tsv.gz create mode 100644 datasets/nikuradse_2/summary_stats.tsv diff --git a/.lfs-assets-id b/.lfs-assets-id index 2de5d5ba..7d52a573 100644 --- a/.lfs-assets-id +++ b/.lfs-assets-id @@ -22,6 +22,7 @@ 0c342ef5d61bbcf43180a3b71d407b9d994942ce43e8960052201daf88dd095d 0d05767a4c118752a25c4632aeea3b71ffa1bfe122b6a2401f85d20541be19a4 0d39f17afc3a1712bd6c460aa941aed7835b3feb142538adfdd31ddc2451d60d +0d43780ab866e54a2a78d8c86ba231ad0a5d55588450a33ed6fe52bee9638341 0fa7cb761daaf9c9bac4ead348064cdb824db9ee3c6a7c968d73ae9a459469f6 0fb9badfe474d0dbc5a23e83ed56dfc8cbd8ae6201f959f3ba0dd147fff89f09 104e99890ea7e473e56d29c38452e79f173cbd5078cc30e1702eb45b42600585 @@ -314,6 +315,7 @@ b8d94f002408f850933b1539a7fa8530dcb68f0dccae47cd8658909e6dbe1146 bbe46db284f0e29f95dfd9984ca32565b52d83217aa084c90de5f6edf82bfea1 bd0e747cb0a16d9f68843ccd6fa0b0d382bb21f2c83ccbc222712426ee42274b bd9d5214451c3b72e8a5ba4ae75a565d373d90a804659d0a5f4617fad3ac4cfd +be6942e13096c21f10496a24056b01ff791e24d6172f5b7a09013c3307d38f28 beedc054b8e7d974a98326db8c834843eb188cffba0f07029a8370b193ce020e bf066d8b8431c89d3c8afd58b0bfe56f53e0aaedd8d4ec05c132268115af3f36 bfc6131af9d009576a82d25e0590955980535eb61c67d1553434da993e79af92 diff --git a/datasets/nikuradse_1/README.md b/datasets/nikuradse_1/README.md new file mode 100644 index 00000000..be59acf1 --- /dev/null +++ b/datasets/nikuradse_1/README.md @@ -0,0 +1,6 @@ +# nikuradse_1 + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/nikuradse_1.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/nikuradse_1/SOURCE.txt b/datasets/nikuradse_1/SOURCE.txt new file mode 100644 index 00000000..9b67c76f --- /dev/null +++ b/datasets/nikuradse_1/SOURCE.txt @@ -0,0 +1,30 @@ +The original source is "J. Nikuradse, Laws of Flow in Rough Pipes, +Technical Memorandum 1292, National Advisory Committee for +Aeronautics, 1950, (Translation of "Strömungsgesetze in rauhren +Rohren", VDI-Forschungsheft 361, Beilage zu "Forschung auf dem Gebiete +des Ingenierswesens" Ausgabe B Band 4, July/August 1933). +Tables 2 to 8. + + +The two input variables are + - relative roughness (column r/k) + - log of Reynolds number (column log Re). + +The target variable is log of 100 times resistance factor +(column log(100 lambda)). Figure 9 in the original Nikuradse paper +shows additional points for Re < 10^3.8 but they are not contained in +the tables. + +A separate version of this dataset has been used in [1]. + +Another separate version of the data has been used in [2]. + + +[1] Bayesian Machine Scientist to Compare Data Collapses for the + Nikuradse Dataset Ignasi Reichardt, Jordi Pallarès, Marta + Sales-Pardo, and Roger Guimerà Phys. Rev. Lett. 124, 084503 - + Published 27 February 2020 + https://doi.org/10.1103/PhysRevLett.124.084503 + +[2] Bobby H. Yang & Daniel D. Joseph (2009) Virtual Nikuradse, Journal + of Turbulence, 10, N11, DOI: 10.1080/14685240902806491 diff --git a/datasets/nikuradse_1/metadata.yaml b/datasets/nikuradse_1/metadata.yaml new file mode 100644 index 00000000..2f78d25b --- /dev/null +++ b/datasets/nikuradse_1/metadata.yaml @@ -0,0 +1,27 @@ +# Reviewed by Gabriel Kronberger +dataset: nikuradse_1 +description: > + Predict the resistance for fluid flow in rough pipes based on the Nikuradse dataset. +source: > + J. Nikuradse: Laws of Flow in Rough Pipes, Technical Memorandum 1292, National Advisory Committee for Aeronautics, 1950, (Translation of "Strömungsgesetze in rauhen Rohren", VDI-Forschungsheft 361, Beilage zu "Forschung auf dem Gebiete des Ingenierswesens" Ausgabe B Band 4, July/August 1933). Tables 2 to 8 and Figure 9. +publication: > + Ignasi Reichardt, Jordi Pallarès, Marta Sales-Pardo, and Roger Guimerà: Bayesian Machine Scientist to Compare Data Collapses for the Nikuradse Dataset. Phys. Rev. Lett. 124, 084503 - Published 27 February 2020 https://doi.org/10.1103/PhysRevLett.124.084503 +task: regression +keywords: + - Physics + - Flow +target: + type: continuous + description: log of 100x resistance factor (log(100 lambda)) + code: null +features: + - name: r_k + type: continuous + description: relative roughness (r/k), r is the radius of the pipe, k is the average projection of roughness. + code: null + transform: ~ + - name: log_Re + type: continuous + description: log of Reynolds number (log Re). + code: null + transform: ~ diff --git a/datasets/nikuradse_1/nikuradse_1.tsv.gz b/datasets/nikuradse_1/nikuradse_1.tsv.gz new file mode 100644 index 00000000..afe96a0c --- /dev/null +++ b/datasets/nikuradse_1/nikuradse_1.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be6942e13096c21f10496a24056b01ff791e24d6172f5b7a09013c3307d38f28 +size 1604 diff --git a/datasets/nikuradse_1/summary_stats.tsv b/datasets/nikuradse_1/summary_stats.tsv new file mode 100644 index 00000000..1894df1f --- /dev/null +++ b/datasets/nikuradse_1/summary_stats.tsv @@ -0,0 +1,2 @@ +dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task +nikuradse_1 362 2 0 0 2 continuous 186.0 0.004936819240411923 regression diff --git a/datasets/nikuradse_2/README.md b/datasets/nikuradse_2/README.md new file mode 100644 index 00000000..e7fd99a6 --- /dev/null +++ b/datasets/nikuradse_2/README.md @@ -0,0 +1,6 @@ +# nikuradse_2 + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/nikuradse_2.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/nikuradse_2/SOURCE.txt b/datasets/nikuradse_2/SOURCE.txt new file mode 100644 index 00000000..6c786683 --- /dev/null +++ b/datasets/nikuradse_2/SOURCE.txt @@ -0,0 +1,20 @@ +The original source is "J. Nikuradse, Laws of Flow in Rough Pipes, +Technical Memorandum 1292, National Advisory Committee for +Aeronautics, 1950, (Translation of "Strömungsgesetze in rauhren +Rohren", VDI-Forschungsheft 361, Beilage zu "Forschung auf dem Gebiete +des Ingenierswesens" Ausgabe B Band 4, July/August 1933). +Tables 2 to 8. + +This data is for the Prandl collapse 1/sqrt(2) - 2 log(r/k) +The input variable is log(v*k/nu). The target variable is 1/sqrt(2) - 2 log(r/k). + +Figure 11 in the original Nikuradse paper shows the data. The points +in the figure do not match the data in the tables exactly. + +A separate version of this dataset has been used in [1]. + +[1] Bayesian Machine Scientist to Compare Data Collapses for the + Nikuradse Dataset Ignasi Reichardt, Jordi Pallarès, Marta + Sales-Pardo, and Roger Guimerà Phys. Rev. Lett. 124, 084503 - + Published 27 February 2020 + https://doi.org/10.1103/PhysRevLett.124.084503 diff --git a/datasets/nikuradse_2/metadata.yaml b/datasets/nikuradse_2/metadata.yaml new file mode 100644 index 00000000..70552aaa --- /dev/null +++ b/datasets/nikuradse_2/metadata.yaml @@ -0,0 +1,24 @@ +# Reviewed by Gabriel Kronberger +dataset: nikuradse_2 +description: > + Predict the resistance for fluid flow in rough pipes based on the Nikuradse dataset. This dataset uses the Prandl collapse. +source: > + J. Nikuradse: Laws of Flow in Rough Pipes, Technical Memorandum 1292, National Advisory Committee for Aeronautics, 1950, (Translation of "Strömungsgesetze in rauhen Rohren", VDI-Forschungsheft 361, Beilage zu "Forschung auf dem Gebiete des Ingenieurswesens" Ausgabe B Band 4, July/August 1933). Tables 2 to 8 and Figure 11. +publication: > + Ignasi Reichardt, Jordi Pallarès, Marta Sales-Pardo, and Roger Guimerà: Bayesian Machine Scientist to Compare Data Collapses for the Nikuradse Dataset. Phys. Rev. Lett. 124, 084503 - Published 27 February 2020 https://doi.org/10.1103/PhysRevLett.124.084503 +task: regression +keywords: + - Physics + - Flow +target: + type: continuous + description: > + Prandl collapse for the resistance factor (1/sqrt(2) - 2 log(r/k)), r is the radius of the pipe, k is the average projection of roughness. + code: null +features: + - name: log_v_k_nu + type: continuous + description: > + Input features for the Prandl collapse: log(v* k/nu), v* is the "friction" velocity, k is the average projection of roughness, nu is the kinematic velocity [cm^2 s^-1]. + code: null + transform: ~ diff --git a/datasets/nikuradse_2/nikuradse_2.tsv.gz b/datasets/nikuradse_2/nikuradse_2.tsv.gz new file mode 100644 index 00000000..55d1d4aa --- /dev/null +++ b/datasets/nikuradse_2/nikuradse_2.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d43780ab866e54a2a78d8c86ba231ad0a5d55588450a33ed6fe52bee9638341 +size 1469 diff --git a/datasets/nikuradse_2/summary_stats.tsv b/datasets/nikuradse_2/summary_stats.tsv new file mode 100644 index 00000000..81f7bc08 --- /dev/null +++ b/datasets/nikuradse_2/summary_stats.tsv @@ -0,0 +1,2 @@ +dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task +nikuradse_2 362 1 0 0 1 continuous 80.0 0.020400530269280518 regression diff --git a/pmlb/all_summary_stats.tsv b/pmlb/all_summary_stats.tsv index ed9ec48c..9011bd80 100644 --- a/pmlb/all_summary_stats.tsv +++ b/pmlb/all_summary_stats.tsv @@ -354,6 +354,8 @@ movement_libras 360 90 0 0 90 categorical 15.0 0.0 classification mushroom 8124 22 5 16 1 categorical 2.0 0.0012918910021209 classification mux6 128 6 6 0 0 categorical 2.0 0.0 classification new_thyroid 215 5 0 0 5 categorical 3.0 0.2990805840995132 classification +nikuradse_1 362 2 0 0 2 continuous 186.0 0.0049368192404119 regression +nikuradse_2 362 1 0 0 1 continuous 80.0 0.0204005302692805 regression nursery 12958 8 1 7 0 categorical 4.0 0.0900899366400157 classification optdigits 5620 64 3 10 51 categorical 10.0 1.4704868366802722e-05 classification page_blocks 5473 10 0 0 10 categorical 5.0 0.7627104291422278 classification