From 7ab67ebade13cb4f4f37afbdcc3544ee3b0547c2 Mon Sep 17 00:00:00 2001 From: Diomidis Spinellis Date: Tue, 2 Jul 2024 14:25:08 +0300 Subject: [PATCH] Add decimal separator --- bin/decimal.sed | 13 ++ examples/datacite/README.md | 288 +++++++++++++------------- examples/datacite/tabulate-schemes.sh | 4 +- 3 files changed, 160 insertions(+), 145 deletions(-) create mode 100755 bin/decimal.sed diff --git a/bin/decimal.sed b/bin/decimal.sed new file mode 100755 index 0000000..7d3e8f1 --- /dev/null +++ b/bin/decimal.sed @@ -0,0 +1,13 @@ +#!/bin/sed -Ef +# +# Add decimal separator to long digit sequences +# +# Diomidis Spinellis, October 2018 +# + +:a +# Add a , after a digit followed by three digits followed by non-digit or EOL +s/([0-9])([0-9]{3})([^0-9]|$)/\1,\2\3/ + +# Try the replacement again if the replacement succeeded +ta diff --git a/examples/datacite/README.md b/examples/datacite/README.md index 188be8f..77384ee 100644 --- a/examples/datacite/README.md +++ b/examples/datacite/README.md @@ -10,114 +10,114 @@ For schemes only the first ten values with the highest occurrence are listed. | Value | Count | |:------|------:| -|contributor\_affiliations|3635578| -|contributor\_name\_identifiers|4576444| -|creator\_affiliations|22560989| -|creator\_name\_identifiers|12830187| -|work\_contributors|24236641| -|work\_creators|202655948| -|work\_dates|81364966| -|work\_descriptions|41259535| -|work\_funding\_references|2305529| -|work\_geo\_locations|19969035| -|work\_related\_identifiers|483494659| -|work\_rights|26632935| -|work\_subjects|117682396| -|work\_titles|57359160| -|works|52863283| +|contributor\_affiliations|3,635,578| +|contributor\_name\_identifiers|4,576,444| +|creator\_affiliations|22,560,989| +|creator\_name\_identifiers|12,830,187| +|work\_contributors|24,236,641| +|work\_creators|202,655,948| +|work\_dates|81,364,966| +|work\_descriptions|41,259,535| +|work\_funding\_references|2,305,529| +|work\_geo\_locations|19,969,035| +|work\_related\_identifiers|483,494,659| +|work\_rights|26,632,935| +|work\_subjects|117,682,396| +|work\_titles|57,359,160| +|works|52,863,283| ## dc\_contributor\_name\_identifiers.name\_identifier\_scheme | Value | Count | |:------|------:| -|ORCID|1649199| -|ROR|782619| -|VIAF|589092| -|GRID|200583| -|GND|169558| -|ISNI|90200| -|BNF|87558| -|LCCN|87558| -|NKC|87558| -|SUDOC|87558| +|ORCID|1,649,199| +|ROR|782,619| +|VIAF|589,092| +|GRID|200,583| +|GND|169,558| +|ISNI|90,200| +|BNF|87,558| +|LCCN|87,558| +|NKC|87,558| +|SUDOC|87,558| ## dc\_creator\_name\_identifiers.scheme\_uri | Value | Count | |:------|------:| -|https://orcid.org|9385742| -|(none)|2601705| -|https://orcid.org/|200060| -|https://d-nb.info/gnd/|86161| -|(none)|83605| -|http://orcid.org/|52468| -|https://www.jacow.org/|31388| -|http://isni.org/isni/|30290| -|http://lccn.loc.gov/|29186| -|https://aleph.nkp.cz/F/?func=find-c&local\_base=aut&CON\_LNG=ENG&ccl\_term=ica=|29186| +|https://orcid.org|9,385,742| +|(none)|2,601,705| +|https://orcid.org/|200,060| +|https://d-nb.info/gnd/|86,161| +|(none)|83,605| +|http://orcid.org/|52,468| +|https://www.jacow.org/|31,388| +|http://isni.org/isni/|30,290| +|http://lccn.loc.gov/|29,186| +|https://aleph.nkp.cz/F/?func=find-c&local\_base=aut&CON\_LNG=ENG&ccl\_term=ica=|29,186| ## dc\_work\_contributors.contributor\_type | Value | Count | |:------|------:| -|ContactPerson|5952672| -|Other|3854570| -|DataManager|3635356| -|HostingInstitution|2358178| -|Researcher|2260431| -|DataCollector|1777541| -|Funder|683551| -|Distributor|564472| -|Editor|518802| -|(none)|460972| +|ContactPerson|5,952,672| +|Other|3,854,570| +|DataManager|3,635,356| +|HostingInstitution|2,358,178| +|Researcher|2,260,431| +|DataCollector|1,777,541| +|Funder|683,551| +|Distributor|564,472| +|Editor|518,802| +|(none)|460,972| ## dc\_work\_creators.name\_type | Value | Count | |:------|------:| -|Personal|155758469| -|(none)|41788325| -|Organizational|5109154| +|Personal|155,758,469| +|(none)|41,788,325| +|Organizational|5,109,154| ## dc\_work\_dates.date\_type | Value | Count | |:------|------:| -|Issued|33869505| -|Updated|18217771| -|Created|10375387| -|Available|8191381| -|Submitted|4422420| -|Accepted|3138445| -|Collected|2968553| -|Copyrighted|108668| -|Valid|44016| -|Withdrawn|13512| +|Issued|33,869,505| +|Updated|18,217,771| +|Created|10,375,387| +|Available|8,191,381| +|Submitted|4,422,420| +|Accepted|3,138,445| +|Collected|2,968,553| +|Copyrighted|108,668| +|Valid|44,016| +|Withdrawn|13,512| ## dc\_work\_descriptions.description\_type | Value | Count | |:------|------:| -|Abstract|29843289| -|Other|9688729| -|SeriesInformation|1040621| -|Methods|250449| -|TechnicalInfo|244637| -|TableOfContents|119599| -|(none)|72210| +|Abstract|29,843,289| +|Other|9,688,729| +|SeriesInformation|1,040,621| +|Methods|250,449| +|TechnicalInfo|244,637| +|TableOfContents|119,599| +|(none)|72,210| |abstract|1| ## dc\_work\_funding\_references.funder\_identifier\_type | Value | Count | |:------|------:| -|Crossref Funder ID|985694| -|(none)|691805| -|ROR|426936| -|GRID|168655| -|ISNI|29975| -|Other|2377| +|Crossref Funder ID|985,694| +|(none)|691,805| +|ROR|426,936| +|GRID|168,655| +|ISNI|29,975| +|Other|2,377| |"Other">China Geological Survey project|36| |Fondation Martine Aublet|20| |Fondation Martine Aublet (Paris, France)|16| @@ -127,40 +127,40 @@ For schemes only the first ten values with the highest occurrence are listed. | Value | Count | |:------|------:| -|DOI|456179995| -|URL|14859234| -|IGSN|9538696| -|LSID|799059| -|EISSN|745896| -|ISSN|414258| -|ISBN|303769| -|Handle|295596| -|LISSN|130048| -|PMID|71231| +|DOI|456,179,995| +|URL|14,859,234| +|IGSN|9,538,696| +|LSID|799,059| +|EISSN|745,896| +|ISSN|414,258| +|ISBN|303,769| +|Handle|295,596| +|LISSN|130,048| +|PMID|71,231| ## dc\_work\_related\_identifiers.relation\_type | Value | Count | |:------|------:| -|References|431691310| -|IsPartOf|13049632| -|IsIdenticalTo|11390034| -|IsSupplementTo|3944418| -|IsVersionOf|3906323| -|HasMetadata|3680959| -|HasPart|2840517| -|IsCitedBy|2684860| -|HasVersion|2656100| -|IsSourceOf|2118187| +|References|431,691,310| +|IsPartOf|13,049,632| +|IsIdenticalTo|11,390,034| +|IsSupplementTo|3,944,418| +|IsVersionOf|3,906,323| +|HasMetadata|3,680,959| +|HasPart|2,840,517| +|IsCitedBy|2,684,860| +|HasVersion|2,656,100| +|IsSourceOf|2,118,187| ## dc\_work\_related\_identifiers.scheme\_type | Value | Count | |:------|------:| -|(none)|480269100| -|DwC-A|1611524| -|XML|1611524| -|http://datacite.org/schema/kernel-3|1965| +|(none)|480,269,100| +|DwC-A|1,611,524| +|XML|1,611,524| +|http://datacite.org/schema/kernel-3|1,965| |text/html|201| |xsd|201| |Text|64| @@ -172,26 +172,26 @@ For schemes only the first ten values with the highest occurrence are listed. | Value | Count | |:------|------:| -|(none)|15617810| -|cc-by-4.0|5859089| -|cc-by-nc-4.0|1696788| -|cc0-1.0|1566760| -|cc-by-sa-4.0|668028| -|cc-by-3.0|335076| -|cc-by-nc-nd-4.0|202084| -|cc-by-nc-sa-4.0|106664| -|cc-by-1.0|83510| -|openaccess|54315| +|(none)|15,617,810| +|cc-by-4.0|5,859,089| +|cc-by-nc-4.0|1,696,788| +|cc0-1.0|1,566,760| +|cc-by-sa-4.0|668,028| +|cc-by-3.0|335,076| +|cc-by-nc-nd-4.0|202,084| +|cc-by-nc-sa-4.0|106,664| +|cc-by-1.0|83,510| +|openaccess|54,315| ## dc\_work\_rights.rights\_identifier\_scheme | Value | Count | |:------|------:| -|(none)|15705417| -|SPDX|10818162| -|info:eu-repo-Access-Terms vocabulary|54315| -|(none)|52238| -|Creative Commons|2105| +|(none)|15,705,417| +|SPDX|10,818,162| +|info:eu-repo-Access-Terms vocabulary|54,315| +|(none)|52,238| +|Creative Commons|2,105| |creativecommons|399| |b2share.legacy|181| |spdx|74| @@ -202,53 +202,53 @@ For schemes only the first ten values with the highest occurrence are listed. | Value | Count | |:------|------:| -|(none)|45955613| -|DOI|4437411| -|URL|1318449| -|EISSN|619787| -|Handle|192467| -|ISSN|149125| -|LISSN|130011| -|ISBN|58286| -|URN|2008| +|(none)|45,955,613| +|DOI|4,437,411| +|URL|1,318,449| +|EISSN|619,787| +|Handle|192,467| +|ISSN|149,125| +|LISSN|130,011| +|ISBN|58,286| +|URN|2,008| |arXiv|66| ## dc\_work\_subjects.subject\_scheme | Value | Count | |:------|------:| -|(none)|76872398| -|Fields of Science and Technology (FOS)|16338295| -|Parameter|4964564| -|arXiv|3858253| -|FOR|3763252| -|LCSH|1401406| -|Method|954884| -|ddc|741879| -|Project|407320| -|keyword|394498| +|(none)|76,872,398| +|Fields of Science and Technology (FOS)|16,338,295| +|Parameter|4,964,564| +|arXiv|3,858,253| +|FOR|3,763,252| +|LCSH|1,401,406| +|Method|954,884| +|ddc|741,879| +|Project|407,320| +|keyword|394,498| ## dc\_work\_subjects.value\_uri | Value | Count | |:------|------:| -|(none)|116832029| -|http://id.loc.gov/authorities/subjects/sh85009003|176266| -|http://www.oecd.org/science/inno/38235147.pdf|25620| -|(none)|19432| -|http://www.narcis.nl/classfication/D37000|5274| -|https://core.tdar.org/browse/geographic-keyword/2810/southwestern-colorado|5013| -|https://core.tdar.org/browse/geographic-keyword/84604/southwestern-us|5012| -|https://core.tdar.org/browse/temporal-keyword/78/basketmaker-iii|4509| -|https://core.tdar.org/browse/material-type/1/ceramic|3824| -|http://astrothesaurus.org/uat/1469|3763| +|(none)|116,832,029| +|http://id.loc.gov/authorities/subjects/sh85,009,003|176,266| +|http://www.oecd.org/science/inno/38,235,147.pdf|25,620| +|(none)|19,432| +|http://www.narcis.nl/classfication/D37,000|5,274| +|https://core.tdar.org/browse/geographic-keyword/2,810/southwestern-colorado|5,013| +|https://core.tdar.org/browse/geographic-keyword/84,604/southwestern-us|5,012| +|https://core.tdar.org/browse/temporal-keyword/78/basketmaker-iii|4,509| +|https://core.tdar.org/browse/material-type/1/ceramic|3,824| +|http://astrothesaurus.org/uat/1,469|3,763| ## dc\_work\_titles.title\_type | Value | Count | |:------|------:| -|(none)|53091171| -|Subtitle|3366476| -|TranslatedTitle|449793| -|AlternativeTitle|282511| -|Other|169209| +|(none)|53,091,171| +|Subtitle|3,366,476| +|TranslatedTitle|449,793| +|AlternativeTitle|282,511| +|Other|169,209| diff --git a/examples/datacite/tabulate-schemes.sh b/examples/datacite/tabulate-schemes.sh index 9757d1e..1f3a6a1 100755 --- a/examples/datacite/tabulate-schemes.sh +++ b/examples/datacite/tabulate-schemes.sh @@ -20,6 +20,8 @@ EOF grep $scheme\| $SCHEMES | sort -t\| -k3rn | head -10 | - awk -F\| '$2 == "" {$2 = "(none)"} {print "|" $2 "|" $3 "|"}' + awk -F\| '$2 == "" {$2 = "(none)"} {print "|" $2 "|" $3 "|"}' | + sed 's/_/\\_/g' | + ../../bin/decimal.sed done