From b1674c08af6ca203173256604d2738dab7dbc816 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B6nke=20Ludwig?= Date: Sat, 17 Feb 2024 12:48:50 +0100 Subject: [PATCH] Add initial project structure based on current vibe.d master. --- .editorconfig | 19 + .github/workflows/test.yml | 43 + .gitignore | 11 + LICENSE.txt | 7 + LICENSE_DE.txt | 7 + README.md | 6 + crypto/dub.sdl | 8 + crypto/vibe/crypto/cryptorand.d | 615 +++++++++ dub.sdl | 14 + run-ci.sh | 24 + source/vibe/inet/message.d | 438 ++++++ source/vibe/inet/mimetypes.d | 754 ++++++++++ source/vibe/inet/url.d | 1167 ++++++++++++++++ source/vibe/inet/urltransfer.d | 116 ++ source/vibe/inet/webform.d | 642 +++++++++ textfilter/dub.sdl | 6 + textfilter/vibe/textfilter/html.d | 190 +++ textfilter/vibe/textfilter/markdown.d | 1752 ++++++++++++++++++++++++ textfilter/vibe/textfilter/urlencode.d | 357 +++++ 19 files changed, 6176 insertions(+) create mode 100644 .editorconfig create mode 100644 .github/workflows/test.yml create mode 100644 .gitignore create mode 100644 LICENSE.txt create mode 100644 LICENSE_DE.txt create mode 100644 README.md create mode 100644 crypto/dub.sdl create mode 100644 crypto/vibe/crypto/cryptorand.d create mode 100644 dub.sdl create mode 100755 run-ci.sh create mode 100644 source/vibe/inet/message.d create mode 100644 source/vibe/inet/mimetypes.d create mode 100644 source/vibe/inet/url.d create mode 100644 source/vibe/inet/urltransfer.d create mode 100644 source/vibe/inet/webform.d create mode 100644 textfilter/dub.sdl create mode 100644 textfilter/vibe/textfilter/html.d create mode 100644 textfilter/vibe/textfilter/markdown.d create mode 100644 textfilter/vibe/textfilter/urlencode.d diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..428b7ee --- /dev/null +++ b/.editorconfig @@ -0,0 +1,19 @@ +root = true + +[*.{c,h,d,di,dd,json}] +end_of_line = lf +insert_final_newline = true +indent_style = tab +indent_size = 4 +trim_trailing_whitespace = true +charset = utf-8 + +[*.{d,di}] +dfmt_brace_style = knr +dfmt_keep_line_breaks = true +dfmt_single_template_constraint_indent = true +dfmt_single_indent = true +dfmt_template_constraint_style = conditional_newline_indent +dfmt_compact_labeled_statements = false +dfmt_space_after_cast = false +dfmt_align_switch_statements = false diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..1a4ba2a --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,43 @@ +name: Run all D Tests + +# Only triggers on pushes/PRs to master +on: + pull_request: + branches: + - master + push: + branches: + - master + - github_actions + +jobs: + test: + name: CI + timeout-minutes: 5 + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macOS-latest] + dc: [dmd-latest, ldc-latest] + arch: [x86_64] + include: + - { os: windows-latest, dc: dmd-2.092.0, arch: x86_64 } + - { os: windows-latest, dc: dmd-2.092.0, arch: x86_mscoff } + - { os: windows-latest, dc: dmd-2.091.1, arch: x86_64 } + - { os: windows-latest, dc: ldc-1.20.1, arch: x86_64 } + + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v3 + + - name: Install D compiler + uses: dlang-community/setup-dlang@v1 + with: + compiler: ${{ matrix.dc }} + + - name: Run tests + env: + DC: ${{matrix.dc}} + ARCH: ${{matrix.arch}} + shell: bash + run: ./run-ci.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..897e50d --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +.dub +.vs +libvibe_core.a +vibe-core-test-* +*.exe +*.lst +*.obj +*.sln +dub.selections.json +vibe_core.lib +tests/tests diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..3720462 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,7 @@ +Copyright (c) 2016-2024 Sönke Ludwig + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/LICENSE_DE.txt b/LICENSE_DE.txt new file mode 100644 index 0000000..7369e67 --- /dev/null +++ b/LICENSE_DE.txt @@ -0,0 +1,7 @@ +Copyright (c) 2016-2024 Snke Ludwig + +Hiermit wird unentgeltlich, jeder Person, die eine Kopie der Software und der zugehrigen Dokumentationen (die "Software") erhlt, die Erlaubnis erteilt, uneingeschrnkt zu benutzen, inklusive und ohne Ausnahme, dem Recht, sie zu verwenden, kopieren, ndern, fusionieren, verlegen, verbreiten, unterlizenzieren und/oder zu verkaufen, und Personen, die diese Software erhalten, diese Rechte zu geben, unter den folgenden Bedingungen: + +Der obige Urheberrechtsvermerk und dieser Erlaubnisvermerk sind in allen Kopien oder Teilkopien der Software beizulegen. + +DIE SOFTWARE WIRD OHNE JEDE AUSDRCKLICHE ODER IMPLIZIERTE GARANTIE BEREITGESTELLT, EINSCHLIESSLICH DER GARANTIE ZUR BENUTZUNG FR DEN VORGESEHENEN ODER EINEM BESTIMMTEN ZWECK SOWIE JEGLICHER RECHTSVERLETZUNG, JEDOCH NICHT DARAUF BESCHRNKT. IN KEINEM FALL SIND DIE AUTOREN ODER COPYRIGHTINHABER FR JEGLICHEN SCHADEN ODER SONSTIGE ANSPRCHE HAFTBAR ZU MACHEN, OB INFOLGE DER ERFLLUNG EINES VERTRAGES, EINES DELIKTES ODER ANDERS IM ZUSAMMENHANG MIT DER SOFTWARE ODER SONSTIGER VERWENDUNG DER SOFTWARE ENTSTANDEN. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..5448ccb --- /dev/null +++ b/README.md @@ -0,0 +1,6 @@ +[![vibe.d](https://vibed.org/images/title-new.png)](https://vibed.org) + +vibe.d serialization and data package +===================================== + +This package contains a generic serialization system, as well as JSON and BSON format support. Additional serialization support for [SDLang](https://sdlang.org/) is available via the [vibe-sdlang](https://github.com/vibe-d/vibe-sdl) package. diff --git a/crypto/dub.sdl b/crypto/dub.sdl new file mode 100644 index 0000000..332ad57 --- /dev/null +++ b/crypto/dub.sdl @@ -0,0 +1,8 @@ +name "crypto" +description "Cryptographic helper routines" +targetType "library" +dependency "vibe-core" version=">=2.8.1 <3.0.0-0" +dependency "mir-linux-kernel" version="~>1.0.0" platform="linux" +sourcePaths "." +importPaths "." +libs "advapi32" platform="windows" diff --git a/crypto/vibe/crypto/cryptorand.d b/crypto/vibe/crypto/cryptorand.d new file mode 100644 index 0000000..1adde11 --- /dev/null +++ b/crypto/vibe/crypto/cryptorand.d @@ -0,0 +1,615 @@ +/** + Implements cryptographically secure random number generators. + + Copyright: © 2013 Sönke Ludwig + License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. + Authors: Ilya Shipunov +*/ +module vibe.crypto.cryptorand; + +import std.conv : text; +import std.digest.sha; +import vibe.core.stream; + + +/** Creates a cryptographically secure random number generator. + + Note that the returned RNG will operate in a non-blocking mode, which means + that if no sufficient entropy has been generated, new random numbers will be + generated from previous state. +*/ +RandomNumberStream secureRNG() +@safe { + static SystemRNG m_rng; + if (!m_rng) m_rng = new SystemRNG; + return m_rng; +} + + +/** + Base interface for all cryptographically secure RNGs. +*/ +interface RandomNumberStream : InputStream { + /** + Fills the buffer new random numbers. + + Params: + dst = The buffer that will be filled with random numbers. + It will contain buffer.length random ubytes. + Supportes both heap-based and stack-based arrays. + mode = The desired waiting mode for IO operations. + + Throws: + CryptoException on error. + */ + override size_t read(scope ubyte[] dst, IOMode mode) @safe; + + alias read = InputStream.read; +} + +version(linux) + enum bool LinuxMaybeHasGetrandom = __traits(compiles, {import mir.linux._asm.unistd : NR_getrandom;}); +else + enum bool LinuxMaybeHasGetrandom = false; + +static if (LinuxMaybeHasGetrandom) +{ + // getrandom was introduced in Linux 3.17 + private enum GET_RANDOM { + UNINITIALIZED, + NOT_AVAILABLE, + AVAILABLE, + } + private __gshared GET_RANDOM hasGetRandom = GET_RANDOM.UNINITIALIZED; + private import core.sys.posix.sys.utsname : utsname; + // druntime might not be properly annotated + private extern(C) int uname(scope utsname* __name) @nogc nothrow; + // checks whether the Linux kernel supports getRandom by looking at the + // reported version + private bool initHasGetRandom() @nogc @trusted nothrow + { + import core.stdc.string : strtok; + import core.stdc.stdlib : atoi; + + utsname uts; + uname(&uts); + char* p = uts.release.ptr; + + // poor man's version check + auto token = strtok(p, "."); + int major = atoi(token); + if (major > 3) return true; + + if (major == 3) + { + token = strtok(p, "."); + if (atoi(token) >= 17) return true; + } + + return false; + } + private extern(C) int syscall(size_t ident, size_t n, size_t arg1, size_t arg2) @nogc nothrow; +} + +version (CRuntime_Bionic) + version = secure_arc4random;//ChaCha20 +version (OSX) + version = secure_arc4random;//AES +version (OpenBSD) + version = secure_arc4random;//ChaCha20 +version (NetBSD) + version = secure_arc4random;//ChaCha20 +version (secure_arc4random) +extern(C) @nogc nothrow private @system +{ + void arc4random_buf(scope void* buf, size_t nbytes); +} + +/** + Operating system specific cryptography secure random number generator. + + It uses the "CryptGenRandom" function for Windows; the "arc4random_buf" + function (not based on RC4 but on a modern and cryptographically secure + cipher) for macOS/OpenBSD/NetBSD; the "getrandom" syscall for Linux 3.17 + and later; and "/dev/urandom" for other Posix platforms. + It's recommended to combine the output use additional processing generated random numbers + via provided functions for systems where security matters. + + Remarks: + Windows "CryptGenRandom" RNG has known security vulnerabilities on + Windows 2000 and Windows XP (assuming the attacker has control of the + machine). Fixed for Windows XP Service Pack 3 and Windows Vista. + + See_Also: $(LINK http://en.wikipedia.org/wiki/CryptGenRandom) +*/ +final class SystemRNG : RandomNumberStream { +@safe: + import std.exception; + + version(Windows) + { + //cryptographic service provider + private HCRYPTPROV hCryptProv; + } + else version(secure_arc4random) + { + //Using arc4random does not involve any extra fields. + } + else version(Posix) + { + import core.stdc.errno : errno, EINTR; + //cryptographic file descriptor + private int m_fd = -1; + } + else + { + static assert(0, "OS is not supported"); + } + + /** + Creates new system random generator + */ + this() + @trusted { + version(Windows) + { + //init cryptographic service provider + enforce!CryptoException(CryptAcquireContext(&this.hCryptProv, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT) != 0, + text("Cannot init SystemRNG: Error id is ", GetLastError())); + } + else version(secure_arc4random) + { + //arc4random requires no setup or cleanup. + } + else version(Posix) + { + import core.sys.posix.fcntl : open, O_RDONLY; + version (linux) static if (LinuxMaybeHasGetrandom) + { + import core.atomic : atomicLoad, atomicStore; + GET_RANDOM p = atomicLoad(*cast(const shared GET_RANDOM*) &hasGetRandom); + if (p == GET_RANDOM.UNINITIALIZED) + { + p = initHasGetRandom() ? GET_RANDOM.AVAILABLE + : GET_RANDOM.NOT_AVAILABLE; + // Benign race condition. + atomicStore(*cast(shared GET_RANDOM*) &hasGetRandom, p); + } + if (p == GET_RANDOM.AVAILABLE) + return; + } + //open file + m_fd = open("/dev/urandom", O_RDONLY); + enforce!CryptoException(m_fd != -1, "Failed to open /dev/urandom"); + } + } + + ~this() + @trusted { + version(Windows) + { + CryptReleaseContext(this.hCryptProv, 0); + } + else version (secure_arc4random) + { + //arc4random requires no setup or cleanup. + } + else version (Posix) + { + import core.sys.posix.unistd : close; + version (linux) static if (LinuxMaybeHasGetrandom) + { + if (m_fd == -1) return; + } + close(m_fd); + } + } + + @property bool empty() { return false; } + @property ulong leastSize() { return ulong.max; } + @property bool dataAvailableForRead() { return true; } + const(ubyte)[] peek() { return null; } + + size_t read(scope ubyte[] buffer, IOMode mode) @trusted + in + { + assert(buffer.length, "buffer length must be larger than 0"); + assert(buffer.length <= uint.max, "buffer length must be smaller or equal uint.max"); + } + do + { + version (Windows) + { + if(0 == CryptGenRandom(this.hCryptProv, cast(DWORD)buffer.length, buffer.ptr)) + { + throw new CryptoException(text("Cannot get next random number: Error id is ", GetLastError())); + } + } + else version (secure_arc4random) + { + arc4random_buf(buffer.ptr, buffer.length);//Cannot fail. + } + else version (Posix) + { + version (linux) static if (LinuxMaybeHasGetrandom) + { + if (hasGetRandom == GET_RANDOM.AVAILABLE) + { + /* + http://man7.org/linux/man-pages/man2/getrandom.2.html + If the urandom source has been initialized, reads of up to 256 bytes + will always return as many bytes as requested and will not be + interrupted by signals. No such guarantees apply for larger buffer + sizes. + */ + import mir.linux._asm.unistd : NR_getrandom; + size_t len = buffer.length; + size_t ptr = cast(size_t) buffer.ptr; + while (len > 0) + { + auto res = syscall(NR_getrandom, ptr, len, 0); + if (res >= 0) + { + len -= res; + ptr += res; + } + else if (errno != EINTR) + { + throw new CryptoException( + text("Failed to read next random number: ", errno)); + } + } + return buffer.length; + } + } + import core.sys.posix.unistd : _read = read; + enforce!CryptoException(_read(m_fd, buffer.ptr, buffer.length) == buffer.length, + text("Failed to read next random number: ", errno)); + } + return buffer.length; + } + + alias read = RandomNumberStream.read; +} + +//test heap-based arrays +unittest +{ + import std.algorithm; + import std.range; + + //number random bytes in the buffer + enum uint bufferSize = 20; + + //number of iteration counts + enum iterationCount = 10; + + auto rng = new SystemRNG(); + + //holds the random number + ubyte[] rand = new ubyte[bufferSize]; + + //holds the previous random number after the creation of the next one + ubyte[] prevRadn = new ubyte[bufferSize]; + + //create the next random number + rng.read(prevRadn); + + assert(!equal(prevRadn, take(repeat(0), bufferSize)), "it's almost unbelievable - all random bytes is zero"); + + //take "iterationCount" arrays with random bytes + foreach(i; 0..iterationCount) + { + //create the next random number + rng.read(rand); + + assert(!equal(rand, take(repeat(0), bufferSize)), "it's almost unbelievable - all random bytes is zero"); + + assert(!equal(rand, prevRadn), "it's almost unbelievable - current and previous random bytes are equal"); + + //copy current random bytes for next iteration + prevRadn[] = rand[]; + } +} + +//test stack-based arrays +unittest +{ + import std.algorithm; + import std.range; + import std.array; + + //number random bytes in the buffer + enum uint bufferSize = 20; + + //number of iteration counts + enum iterationCount = 10; + + //array that contains only zeros + ubyte[bufferSize] zeroArray; + zeroArray[] = take(repeat(cast(ubyte)0), bufferSize).array()[]; + + auto rng = new SystemRNG(); + + //holds the random number + ubyte[bufferSize] rand; + + //holds the previous random number after the creation of the next one + ubyte[bufferSize] prevRadn; + + //create the next random number + rng.read(prevRadn); + + assert(prevRadn != zeroArray, "it's almost unbelievable - all random bytes is zero"); + + //take "iterationCount" arrays with random bytes + foreach(i; 0..iterationCount) + { + //create the next random number + rng.read(rand); + + assert(prevRadn != zeroArray, "it's almost unbelievable - all random bytes is zero"); + + assert(rand != prevRadn, "it's almost unbelievable - current and previous random bytes are equal"); + + //copy current random bytes for next iteration + prevRadn[] = rand[]; + } +} + + +/** + Hash-based cryptographically secure random number mixer. + + This RNG uses a hash function to mix a specific amount of random bytes from the input RNG. + Use only cryptographically secure hash functions like SHA-512, Whirlpool or SHA-256, but not MD5. + + Params: + Hash: The hash function used, for example SHA1 + factor: Determines how many times the hash digest length of input data + is used as input to the hash function. Increase factor value if you + need more security because it increases entropy level or decrease + the factor value if you need more speed. + +*/ +final class HashMixerRNG(Hash, uint factor) : RandomNumberStream + if(isDigest!Hash) +{ + static assert(factor, "factor must be larger than 0"); + + //random number generator + SystemRNG rng; + + /** + Creates new hash-based mixer random generator. + */ + this() + { + //create random number generator + this.rng = new SystemRNG(); + } + + @property bool empty() { return false; } + @property ulong leastSize() { return ulong.max; } + @property bool dataAvailableForRead() { return true; } + const(ubyte)[] peek() { return null; } + + size_t read(scope ubyte[] buffer, IOMode mode) + in + { + assert(buffer.length, "buffer length must be larger than 0"); + assert(buffer.length <= uint.max, "buffer length must be smaller or equal uint.max"); + } + do + { + auto len = buffer.length; + + //use stack to allocate internal buffer + ubyte[factor * digestLength!Hash] internalBuffer = void; + + //init internal buffer + this.rng.read(internalBuffer); + + //create new random number on stack + ubyte[digestLength!Hash] randomNumber = digest!Hash(internalBuffer); + + //allows to fill buffers longer than hash digest length + while(buffer.length > digestLength!Hash) + { + //fill the buffer's beginning + buffer[0..digestLength!Hash] = randomNumber[0..$]; + + //receive the buffer's end + buffer = buffer[digestLength!Hash..$]; + + //re-init internal buffer + this.rng.read(internalBuffer); + + //create next random number + randomNumber = digest!Hash(internalBuffer); + } + + //fill the buffer's end + buffer[0..$] = randomNumber[0..buffer.length]; + + return len; + } + + alias read = RandomNumberStream.read; +} + +/// A SHA-1 based mixing RNG. Alias for HashMixerRNG!(SHA1, 5). +alias SHA1HashMixerRNG = HashMixerRNG!(SHA1, 5); + +//test heap-based arrays +unittest +{ + import std.algorithm; + import std.range; + import std.typetuple; + import std.digest.md; + + //number of iteration counts + enum iterationCount = 10; + + enum uint factor = 5; + + //tested hash functions + foreach(Hash; TypeTuple!(SHA1, MD5)) + { + //test for different number random bytes in the buffer from 10 to 80 inclusive + foreach(bufferSize; iota(10, 81)) + { + auto rng = new HashMixerRNG!(Hash, factor)(); + + //holds the random number + ubyte[] rand = new ubyte[bufferSize]; + + //holds the previous random number after the creation of the next one + ubyte[] prevRadn = new ubyte[bufferSize]; + + //create the next random number + rng.read(prevRadn); + + assert(!equal(prevRadn, take(repeat(0), bufferSize)), "it's almost unbelievable - all random bytes is zero"); + + //take "iterationCount" arrays with random bytes + foreach(i; 0..iterationCount) + { + //create the next random number + rng.read(rand); + + assert(!equal(rand, take(repeat(0), bufferSize)), "it's almost unbelievable - all random bytes is zero"); + + assert(!equal(rand, prevRadn), "it's almost unbelievable - current and previous random bytes are equal"); + + //make sure that we have different random bytes in different hash digests + if(bufferSize > digestLength!Hash) + { + //begin and end of random number array + ubyte[] begin = rand[0..digestLength!Hash]; + ubyte[] end = rand[digestLength!Hash..$]; + + //compare all nearby hash digests + while(end.length >= digestLength!Hash) + { + assert(!equal(begin, end[0..digestLength!Hash]), "it's almost unbelievable - random bytes in different hash digests are equal"); + + //go to the next hash digests + begin = end[0..digestLength!Hash]; + end = end[digestLength!Hash..$]; + } + } + + //copy current random bytes for next iteration + prevRadn[] = rand[]; + } + } + } +} + +//test stack-based arrays +unittest +{ + import std.algorithm; + import std.range; + import std.array; + import std.typetuple; + import std.digest.md; + + //number of iteration counts + enum iterationCount = 10; + + enum uint factor = 5; + + //tested hash functions + foreach(Hash; TypeTuple!(SHA1, MD5)) + { + //test for different number random bytes in the buffer + foreach(bufferSize; TypeTuple!(10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80)) + { + //array that contains only zeros + ubyte[bufferSize] zeroArray; + zeroArray[] = take(repeat(cast(ubyte)0), bufferSize).array()[]; + + auto rng = new HashMixerRNG!(Hash, factor)(); + + //holds the random number + ubyte[bufferSize] rand; + + //holds the previous random number after the creation of the next one + ubyte[bufferSize] prevRadn; + + //create the next random number + rng.read(prevRadn); + + assert(prevRadn != zeroArray, "it's almost unbelievable - all random bytes is zero"); + + //take "iterationCount" arrays with random bytes + foreach(i; 0..iterationCount) + { + //create the next random number + rng.read(rand); + + assert(prevRadn != zeroArray, "it's almost unbelievable - all random bytes is zero"); + + assert(rand != prevRadn, "it's almost unbelievable - current and previous random bytes are equal"); + + //make sure that we have different random bytes in different hash digests + static if(bufferSize > digestLength!Hash) + { + //begin and end of random number array + ubyte[] begin = rand[0..digestLength!Hash]; + ubyte[] end = rand[digestLength!Hash..$]; + + //compare all nearby hash digests + while(end.length >= digestLength!Hash) + { + assert(!equal(begin, end[0..digestLength!Hash]), "it's almost unbelievable - random bytes in different hash digests are equal"); + + //go to the next hash digests + begin = end[0..digestLength!Hash]; + end = end[digestLength!Hash..$]; + } + } + + //copy current random bytes for next iteration + prevRadn[] = rand[]; + } + } + } +} + + +/** + Thrown when an error occurs during random number generation. +*/ +class CryptoException : Exception +{ + this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable next = null) @safe pure nothrow + { + super(msg, file, line, next); + } +} + + +version(Windows) +{ + import core.sys.windows.windows; + + private extern(Windows) nothrow + { + alias HCRYPTPROV = size_t; + + enum LPCTSTR NULL = cast(LPCTSTR)0; + enum DWORD PROV_RSA_FULL = 1; + enum DWORD CRYPT_VERIFYCONTEXT = 0xF0000000; + + BOOL CryptAcquireContextA(HCRYPTPROV *phProv, LPCTSTR pszContainer, LPCTSTR pszProvider, DWORD dwProvType, DWORD dwFlags); + alias CryptAcquireContext = CryptAcquireContextA; + + BOOL CryptReleaseContext(HCRYPTPROV hProv, DWORD dwFlags); + + BOOL CryptGenRandom(HCRYPTPROV hProv, DWORD dwLen, BYTE *pbBuffer); + } +} diff --git a/dub.sdl b/dub.sdl new file mode 100644 index 0000000..49e617d --- /dev/null +++ b/dub.sdl @@ -0,0 +1,14 @@ +name "vibe-inet" +description "Internet standard functionality" +authors "Sönke Ludwig" +copyright "Copyright © 2016-2024, Sönke Ludwig" +license "MIT" + +dependency "vibe-core" version=">=2.8.1 <3.0.0-0" +dependency "vibe-serialization" version="*" +dependency "vibe-stream" version="*" +dependency ":textfilter" version="*" +targetType "library" + +subPackage "textfilter" +subPackage "crypto" diff --git a/run-ci.sh b/run-ci.sh new file mode 100755 index 0000000..be72513 --- /dev/null +++ b/run-ci.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +set -e -x -o pipefail + +DCVER=${DC#*-} +DC=${DC%-*} +if [ "$DC" == "ldc" ]; then DC="ldc2"; fi +DUB_FLAGS="${DUB_FLAGS:-} --compiler=$DC" + + +# Check for trailing whitespace" +grep -nrI --include='*.d' '\s$' . && (echo "Trailing whitespace found"; exit 1) + +# test for successful release build +dub build -b release $DUB_FLAGS + +# test for successful 32-bit build +if [ "$DC" == "dmd" ]; then + dub build --arch=x86 $DUB_FLAGS +fi + +dub test $DUB_FLAGS +dub test :crypto $DUB_FLAGS +dub test :textfilter $DUB_FLAGS diff --git a/source/vibe/inet/message.d b/source/vibe/inet/message.d new file mode 100644 index 0000000..6ca9127 --- /dev/null +++ b/source/vibe/inet/message.d @@ -0,0 +1,438 @@ +/** + Internet message handling according to RFC822/RFC5322 + + Copyright: © 2012-2014 Sönke Ludwig + License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. + Authors: Sönke Ludwig +*/ +module vibe.inet.message; + +import vibe.core.log; +import vibe.core.stream; +import vibe.stream.operations; +import vibe.container.internal.appender; +import vibe.container.internal.utilallocator; +import vibe.container.dictionarylist; +import vibe.internal.string; + +import std.conv; +import std.datetime; +import std.exception; +import std.range; +import std.string; + + +/** + Parses an internet header according to RFC5322 (with RFC822 compatibility). + + Params: + input = Input stream from which the header is parsed + dst = Destination map to write into + max_line_length = The maximum allowed length of a single line + alloc = Custom allocator to use for allocating strings + rfc822_compatible = Flag indicating that duplicate fields should be merged using a comma +*/ +void parseRFC5322Header(InputStream)(InputStream input, ref InetHeaderMap dst, size_t max_line_length = 1000) + if (isInputStream!InputStream) +{ + parseRFC5322Header(input, dst, max_line_length, vibeThreadAllocator()); +} +/// ditto +void parseRFC5322Header(InputStream, Allocator)(InputStream input, ref InetHeaderMap dst, size_t max_line_length, Allocator alloc, bool rfc822_compatible = true) + if (isInputStream!InputStream) +{ + string hdr, hdrvalue; + + void addPreviousHeader() { + if (!hdr.length) return; + if (rfc822_compatible) { + if (auto pv = hdr in dst) { + *pv ~= "," ~ hdrvalue; // RFC822 legacy support + } else { + dst[hdr] = hdrvalue; + } + } else dst.addField(hdr, hdrvalue); + } + + string readStringLine() @safe { + auto ret = input.readLine(max_line_length, "\n", alloc); + if (ret.length && ret[$-1] == '\r') ret = ret[0..$-1]; + return () @trusted { return cast(string)ret; } (); + } + + string ln; + while ((ln = readStringLine()).length > 0) { + if (ln[0] != ' ' && ln[0] != '\t') { + addPreviousHeader(); + + auto colonpos = ln.indexOf(':'); + enforce(colonpos >= 0, "Header is missing ':'."); + enforce(colonpos > 0, "Header name is empty."); + hdr = ln[0..colonpos].stripA(); + hdrvalue = ln[colonpos+1..$].stripA(); + } else { + hdrvalue ~= " " ~ ln.stripA(); + } + } + addPreviousHeader(); +} + +unittest { // test usual, empty and multiline header + import vibe.stream.memory; + ubyte[] hdr = cast(ubyte[])"A: a \r\nB: \r\nC:\r\n\tc\r\n\r\n".dup; + InetHeaderMap map; + parseRFC5322Header(createMemoryStream(hdr), map); + assert(map.length == 3); + assert(map["A"] == "a"); + assert(map["B"] == ""); + assert(map["C"] == " c"); +} + +unittest { // fail for empty header names + import std.exception; + import vibe.stream.memory; + auto hdr = cast(ubyte[])": test\r\n\r\n".dup; + InetHeaderMap map; + assertThrown(parseRFC5322Header(createMemoryStream(hdr), map)); +} + +unittest { // tolerant line separator header parser - see: https://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html#sec19.3 + import std.exception; + import vibe.stream.memory; + auto hdr = cast(ubyte[])"a: test\r\nb: foo\nc: bar\n\nbody".dup; + InetHeaderMap map; + parseRFC5322Header(createMemoryStream(hdr), map); + assert(map.length == 3); + assert(map["a"] == "test"); + assert(map["b"] == "foo"); + assert(map["c"] == "bar"); +} + +private immutable monthStrings = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]; + +/** + Writes an RFC-822/5322 date string to the given output range. +*/ +void writeRFC822DateString(R)(ref R dst, SysTime time) +{ + writeRFC822DateString(dst, cast(Date)time); +} +/// ditto +void writeRFC822DateString(R)(ref R dst, Date date) +{ + static immutable dayStrings = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"]; + dst.put(dayStrings[date.dayOfWeek]); + dst.put(", "); + writeDecimal2(dst, date.day); + dst.put(' '); + dst.put(monthStrings[date.month-1]); + dst.put(' '); + writeDecimal(dst, date.year); +} + +/** + Writes an RFC-822 time string to the given output range. +*/ +void writeRFC822TimeString(R)(ref R dst, SysTime time) +{ + writeRFC822TimeString(dst, cast(TimeOfDay)time, getRFC822TimeZoneOffset(time)); +} +/// ditto +void writeRFC822TimeString(R)(ref R dst, TimeOfDay time, int tz_offset) +{ + writeDecimal2(dst, time.hour); + dst.put(':'); + writeDecimal2(dst, time.minute); + dst.put(':'); + writeDecimal2(dst, time.second); + if (tz_offset == 0) dst.put(" GMT"); + else { + dst.put(' '); + dst.put(tz_offset >= 0 ? '+' : '-'); + if (tz_offset < 0) tz_offset = -tz_offset; + writeDecimal2(dst, tz_offset / 60); + writeDecimal2(dst, tz_offset % 60); + } +} + +/** + Writes an RFC-822 date+time string to the given output range. +*/ +void writeRFC822DateTimeString(R)(ref R dst, SysTime time) +{ + writeRFC822DateTimeString(dst, cast(DateTime)time, getRFC822TimeZoneOffset(time)); +} +/// ditto +void writeRFC822DateTimeString(R)(ref R dst, DateTime time, int tz_offset) +{ + writeRFC822DateString(dst, time.date); + dst.put(' '); + writeRFC822TimeString(dst, time.timeOfDay, tz_offset); +} + +/** + Returns the RFC-822 time string representation of the given time. +*/ +string toRFC822TimeString(SysTime time) +@trusted { + auto ret = new FixedAppender!(string, 14); + writeRFC822TimeString(ret, time); + return ret.data; +} + +/** + Returns the RFC-822/5322 date string representation of the given time. +*/ +string toRFC822DateString(SysTime time) +@trusted { + auto ret = new FixedAppender!(string, 16); + writeRFC822DateString(ret, time); + return ret.data; +} + +/** + Returns the RFC-822 date+time string representation of the given time. +*/ +string toRFC822DateTimeString(SysTime time) +@trusted { + auto ret = new FixedAppender!(string, 31); + writeRFC822DateTimeString(ret, time); + return ret.data; +} + +/** + Returns the offset of the given time from UTC in minutes. +*/ +int getRFC822TimeZoneOffset(SysTime time) +@safe { + return cast(int)time.utcOffset.total!"minutes"; +} + +/// Parses a date+time string according to RFC-822/5322. +alias parseRFC822DateTimeString = parseRFC822DateTime; + +unittest { + import std.typecons; + + auto times = [ + tuple("Wed, 02 Oct 2002 08:00:00 GMT", SysTime(DateTime(2002, 10, 02, 8, 0, 0), UTC())), + tuple("Wed, 02 Oct 2002 08:00:00 +0200", SysTime(DateTime(2002, 10, 02, 8, 0, 0), new immutable SimpleTimeZone(120.minutes))), + tuple("Wed, 02 Oct 2002 08:00:00 -0130", SysTime(DateTime(2002, 10, 02, 8, 0, 0), new immutable SimpleTimeZone(-90.minutes))) + ]; + foreach (t; times) { + auto st = parseRFC822DateTimeString(t[0]); + auto ts = toRFC822DateTimeString(t[1]); + assert(st == t[1], "Parse error: "~t[0]); + assert(parseRFC822DateTimeString(ts) == t[1], "Stringify error: "~ts); + } +} + + +/** + Decodes a string in encoded-word form. + + See_Also: $(LINK http://tools.ietf.org/html/rfc2047#section-2) +*/ +string decodeEncodedWords()(string encoded) +{ + import std.array; + Appender!string dst; + () @trusted { + dst = appender!string(); + decodeEncodedWords(dst, encoded); + } (); + return dst.data; +} +/// ditto +void decodeEncodedWords(R)(ref R dst, string encoded) +{ + import std.base64; + import std.encoding; + + while(!encoded.empty){ + auto idx = encoded.indexOf("=?"); + if( idx >= 0 ){ + auto end = encoded.indexOf("?="); + enforce(end > idx); + dst.put(encoded[0 .. idx]); + auto code = encoded[idx+2 .. end]; + encoded = encoded[end+2 .. $]; + + idx = code.indexOf('?'); + auto cs = code[0 .. idx]; + auto enc = code[idx+1]; + auto data = code[idx+3 .. $]; + const(ubyte)[] textenc; + switch(enc){ + default: textenc = cast(ubyte[])data; break; + case 'B': textenc = Base64.decode(data); break; + case 'Q': textenc = QuotedPrintable.decode(data, true); break; + } + + switch(cs){ + default: dst.put(sanitize(cast(string)textenc)); break; + case "UTF-8": dst.put(cast(string)textenc); break; + case "ISO-8859-15": // hack... + case "ISO-8859-1": + string tmp; + transcode(cast(Latin1String)textenc, tmp); + dst.put(tmp); + break; + } + } else { + dst.put(encoded); + break; + } + } +} + + +/** + Decodes a From/To header value as it appears in emails. +*/ +void decodeEmailAddressHeader(string header, out string name, out string address) +@safe { + import std.utf; + + scope(failure) logDebug("emailbase %s", header); + header = decodeEncodedWords(header); + scope(failure) logDebug("emaildec %s", header); + + if( header[$-1] == '>' ){ + auto sidx = header.lastIndexOf('<'); + enforce(sidx >= 0); + address = header[sidx+1 .. $-1]; + header = header[0 .. sidx].strip(); + + if( header[0] == '"' ){ + name = header[1 .. $-1]; + } else { + name = header.strip(); + } + } else { + name = header; + address = header; + } + validate(name); +} + + +/** + Decodes a message body according to the specified content transfer + encoding ("Content-Transfer-Encoding" header). + + The result is returned as a UTF-8 string. +*/ +string decodeMessage(in ubyte[] message_body, string content_transfer_encoding) +@safe { + import std.algorithm; + import std.base64; + import std.encoding : sanitize; + + const(ubyte)[] msg = message_body; + immutable(ubyte)[] msgdec; + switch (content_transfer_encoding) { + default: + msgdec = msg.idup; + break; + case "quoted-printable": + msgdec = QuotedPrintable.decode(cast(const(char)[])msg); + break; + case "base64": + try msgdec = Base64.decode(msg); + catch(Exception e){ + auto dst = appender!(immutable(ubyte)[])(); + try { + auto dec = Base64.decoder(filter!(ch => ch != '\r' && ch != '\n')(msg)); + while( !dec.empty ){ + dst.put(dec.front); + dec.popFront(); + } + } catch(Exception e){ + dst.put(cast(const(ubyte)[])"\r\n-------\r\nDECODING ERROR: "); + dst.put(cast(const(ubyte)[])() @trusted { return e.toString(); } ()); + } + msgdec = dst.data(); + } + break; + } + // TODO: do character encoding etc. + return () @trusted { return sanitize(cast(string)msgdec); } (); +} + + +/** + Behaves similar to string[string] but case does not matter for the key, the insertion order is not + changed and multiple values per key are supported. + + This kind of map is used for MIME headers (e.g. for HTTP), where the case of the key strings + does not matter. Note that the map can contain fields with the same key multiple times if + addField is used for insertion. Insertion order is preserved. + + Note that despite case not being relevant for matching keyse, iterating over the map will yield + the original case of the key that was put in. +*/ +alias InetHeaderMap = DictionaryList!(string, false, 12); + + + +/** + Performs quoted-printable decoding. +*/ +struct QuotedPrintable { + static immutable(ubyte)[] decode(in char[] input, bool in_header = false) + @safe { + auto ret = appender!(immutable(ubyte)[])(); + for( size_t i = 0; i < input.length; i++ ){ + if( input[i] == '=' ){ + import std.utf : UTFException; + if (input.length - i <= 2) throw new UTFException(""); + auto code = input[i+1 .. i+3]; + i += 2; + if( code != cast(const(ubyte)[])"\r\n" ) + ret.put(code.parse!ubyte(16)); + } else if( in_header && input[i] == '_') ret.put(' '); + else ret.put(input[i]); + } + return ret.data(); + } +} + +unittest +{ + assert(QuotedPrintable.decode("abc") == "abc"); + assert(QuotedPrintable.decode("a=3Cc") == "a cast(InetPath.Segment2)s))); + } else { + ip = InetPath(only(InetPath.Segment("", '/')) + .chain(segs.map!(s => cast(InetPath.Segment)s))); + } + + this("file", host, 0, ip); + } else this("file", host, 0, cast(InetPath)path); + } + /// ditto + this(PosixPath path) pure + { + enforce(path.absolute, "Only absolute paths can be converted to a URL."); + + this("file", null, 0, cast(InetPath)path); + } + + /** Constructs a URL from its string representation. + + TODO: additional validation required (e.g. valid host and user names and port) + */ + this(string url_string) + { + this(url_string, true); + } + + private this(string url_string, bool encoded) + { + auto str = url_string; + enforce(str.length > 0, "Empty URL."); + if( str[0] != '/' ){ + auto idx = str.indexOf(':'); + enforce(idx > 0, "No schema in URL:"~str); + m_schema = str[0 .. idx]; + enforce(m_schema[0].isAlpha, + "Schema must start with an alphabetical char, found: " ~ + m_schema[0]); + str = str[idx+1 .. $]; + bool requires_host = false; + + if (str.startsWith("//")) { + // proto://server/path style + requires_host = true; + str = str[2 .. $]; + } + + auto si = str.indexOf('/'); + if( si < 0 ) si = str.length; + auto ai = str[0 .. si].indexOf('@'); + sizediff_t hs = 0; + if( ai >= 0 ){ + hs = ai+1; + auto ci = str[0 .. ai].indexOf(':'); + if( ci >= 0 ){ + m_username = str[0 .. ci]; + m_password = str[ci+1 .. ai]; + } else m_username = str[0 .. ai]; + enforce(m_username.length > 0, "Empty user name in URL."); + } + + m_host = str[hs .. si]; + + auto findPort ( string src ) + { + auto pi = src.indexOf(':'); + if(pi > 0) { + enforce(pi < src.length-1, "Empty port in URL."); + m_port = to!ushort(src[pi+1..$]); + } + return pi; + } + + + auto ip6 = m_host.indexOf('['); + if (ip6 == 0) { // [ must be first char + auto pe = m_host.indexOf(']'); + if (pe > 0) { + findPort(m_host[pe..$]); + m_host = m_host[1 .. pe]; + } + } + else { + auto pi = findPort(m_host); + if(pi > 0) { + m_host = m_host[0 .. pi]; + } + if (!encoded) + m_host = m_host.splitter('.').map!(punyEncode).join('.'); + } + + enforce(!requires_host || m_schema == "file" || m_host.length > 0, + "Empty server name in URL."); + str = str[si .. $]; + } + + this.localURI = (encoded) ? str : str.encode; + } + /// ditto + static URL parse(string url_string) + { + return URL(url_string); + } + /// ditto + static URL fromString(string url_string) + { + return URL(url_string); + } + + /// The schema/protocol part of the URL + @property string schema() const nothrow { return m_schema; } + /// ditto + @property void schema(string v) { m_schema = v; } + + /// The url encoded path part of the URL + @property string pathString() const nothrow { return m_path.toString; } + + /// Set the path part of the URL. It should be properly encoded. + @property void pathString(string s) + { + enforce(isURLEncoded(s), "Wrong URL encoding of the path string '"~s~"'"); + m_path = InetPath(s); + } + + /// The path part of the URL + @property InetPath path() const nothrow { return m_path; } + /// ditto + @property void path(InetPath p) + nothrow { + m_path = p; + } + /// ditto + @property void path(Path)(Path p) + if (isInstanceOf!(GenericPath, Path) && !is(Path == InetPath)) + { + m_path = cast(InetPath)p; + } + + /// The host part of the URL (depends on the schema) + @property string host() const pure nothrow { return m_host; } + /// ditto + @property void host(string v) { m_host = v; } + + /// The port part of the URL (optional) + @property ushort port() const nothrow { return m_port ? m_port : defaultPort(m_schema); } + /// ditto + @property port(ushort v) nothrow { m_port = v; } + + /// Get the default port for the given schema or 0 + static ushort defaultPort(string schema) + nothrow { + import core.atomic : atomicLoad; + import std.uni : toLower; + + string lowerschema; + + try + lowerschema = schema.toLower(); + catch (Exception e) + assert(false, e.msg); + + if (auto set = atomicLoad(map_commonInternetSchemas)) + if (set.contains(lowerschema)) + return set.get(lowerschema); + + return 0; + } + /// ditto + ushort defaultPort() + const nothrow { + return defaultPort(m_schema); + } + + /// The user name part of the URL (optional) + @property string username() const nothrow { return m_username; } + /// ditto + @property void username(string v) { m_username = v; } + + /// The password part of the URL (optional) + @property string password() const nothrow { return m_password; } + /// ditto + @property void password(string v) { m_password = v; } + + /// The query string part of the URL (optional) + @property string queryString() const nothrow { return m_queryString; } + /// ditto + @property void queryString(string v) { m_queryString = v; } + + /// The anchor part of the URL (optional) + @property string anchor() const nothrow { return m_anchor; } + + /// The path part plus query string and anchor + @property string localURI() + const nothrow { + auto str = appender!string(); + str.put(m_path.toString); + if( queryString.length ) { + str.put("?"); + str.put(queryString); + } + if( anchor.length ) { + str.put("#"); + str.put(anchor); + } + return str.data; + } + /// ditto + @property void localURI(string str) + { + auto ai = str.indexOf('#'); + if( ai >= 0 ){ + m_anchor = str[ai+1 .. $]; + str = str[0 .. ai]; + } else m_anchor = null; + + auto qi = str.indexOf('?'); + if( qi >= 0 ){ + m_queryString = str[qi+1 .. $]; + str = str[0 .. qi]; + } else m_queryString = null; + + this.pathString = str; + } + + /// The URL to the parent path with query string and anchor stripped. + @property URL parentURL() + const { + URL ret; + ret.schema = schema; + ret.host = host; + ret.port = port; + ret.username = username; + ret.password = password; + ret.path = path.parentPath; + return ret; + } + + /// Converts this URL object to its string representation. + string toString() + const nothrow { + auto dst = appender!string(); + try this.toString(dst); + catch (Exception e) assert(false, e.msg); + return dst.data; + } + + /// Ditto + void toString(OutputRange) (ref OutputRange dst) const { + import std.format; + dst.put(schema); + dst.put(":"); + if (isCommonInternetSchema(schema)) + dst.put("//"); + if (m_username.length || m_password.length) { + dst.put(username); + if (m_password.length) + { + dst.put(':'); + dst.put(password); + } + dst.put('@'); + } + + import std.algorithm : canFind; + auto ipv6 = host.canFind(":"); + + if ( ipv6 ) dst.put('['); + dst.put(host); + if ( ipv6 ) dst.put(']'); + + if (m_port > 0) + formattedWrite(dst, ":%d", m_port); + + dst.put(localURI); + } + + /** Converts a "file" URL back to a native file system path. + */ + NativePath toNativePath() + const { + import std.algorithm.iteration : map; + import std.range : dropOne; + + enforce(this.schema == "file", "Only file:// URLs can be converted to a native path."); + + version (Windows) { + if (this.host.length) { + static if (is(NativePath.Segment2)) { + auto p = NativePath(this.path + .bySegment2 + .dropOne + .map!(s => cast(WindowsPath.Segment2)s) + ); + } else { + auto p = NativePath(this.path + .bySegment + .dropOne + .map!(s => cast(WindowsPath.Segment)s) + ); + } + return NativePath.fromTrustedString(`\\`~this.host) ~ p; + } + } + + return cast(NativePath)this.path; + } + + /// Decode percent encoded triplets for unreserved or convert to uppercase + private string normalize_percent_encoding(scope const(char)[] input) + { + auto normalized = appender!string; + normalized.reserve(input.length); + + for (size_t i = 0; i < input.length; i++) + { + const char c = input[i]; + if (c == '%') + { + if (input.length < i + 3) + assert(false, "Invalid percent encoding"); + + char conv = cast(char) input[i + 1 .. i + 3].to!ubyte(16); + switch (conv) + { + case 'A': .. case 'Z': + case 'a': .. case 'z': + case '0': .. case '9': + case '-': case '.': case '_': case '~': + normalized ~= conv; // Decode unreserved + break; + default: + normalized ~= input[i .. i + 3].toUpper(); // Uppercase HEX + break; + } + + i += 2; + } + else + normalized ~= c; + } + + return normalized.data; + } + + /** + * Normalize the content of this `URL` in place + * + * Normalization can be used to create a more consistent and human-friendly + * string representation of the `URL`. + * The list of transformations applied in the process of normalization is as follows: + - Converting schema and host to lowercase + - Removing port if it is the default port for schema + - Removing dot segments in path + - Converting percent-encoded triplets to uppercase + - Adding slash when path is empty + - Adding slash to path when path represents a directory + - Decoding percent encoded triplets for unreserved characters + A-Z a-z 0-9 - . _ ~ + + Params: + isDirectory = Path of the URL represents a directory, if one is + not already present, a trailing slash will be appended when `true` + */ + void normalize(bool isDirectory = false) + { + import std.uni : toLower; + + // Lowercase host and schema + this.m_schema = this.m_schema.toLower(); + this.m_host = this.m_host.toLower(); + + // Remove default port + if (this.m_port == URL.defaultPort(this.m_schema)) + this.m_port = 0; + + // Normalize percent encoding, decode unreserved or uppercase hex + this.m_queryString = normalize_percent_encoding(this.m_queryString); + this.m_anchor = normalize_percent_encoding(this.m_anchor); + + // Normalize path (first remove dot segments then normalize path segments) + this.m_path = InetPath(this.m_path.normalized.bySegment2.map!( + n => InetPath.Segment2.fromTrustedEncodedString(normalize_percent_encoding(n.encodedName)) + ).array); + + // Add trailing slash to empty path + if (this.m_path.empty || isDirectory) + this.m_path.endsWithSlash = true; + } + + /** Returns the normalized form of the URL. + + See `normalize` for a full description. + */ + URL normalized() + const { + URL ret = this; + ret.normalize(); + return ret; + } + + bool startsWith(const URL rhs) + const nothrow { + if( m_schema != rhs.m_schema ) return false; + if( m_host != rhs.m_host ) return false; + // FIXME: also consider user, port, querystring, anchor etc + static if (is(InetPath.Segment2)) + return this.path.bySegment2.startsWith(rhs.path.bySegment2); + else return this.path.bySegment.startsWith(rhs.path.bySegment); + } + + URL opBinary(string OP, Path)(Path rhs) const if (OP == "~" && isAnyPath!Path) { return URL(m_schema, m_host, m_port, this.path ~ rhs); } + URL opBinary(string OP, Path)(Path.Segment rhs) const if (OP == "~" && isAnyPath!Path) { return URL(m_schema, m_host, m_port, this.path ~ rhs); } + void opOpAssign(string OP, Path)(Path rhs) if (OP == "~" && isAnyPath!Path) { this.path = this.path ~ rhs; } + void opOpAssign(string OP, Path)(Path.Segment rhs) if (OP == "~" && isAnyPath!Path) { this.path = this.path ~ rhs; } + static if (is(InetPath.Segment2) && !is(InetPath.Segment2 == InetPath.Segment)) { + URL opBinary(string OP, Path)(Path.Segment2 rhs) const if (OP == "~" && isAnyPath!Path) { return URL(m_schema, m_host, m_port, this.path ~ rhs); } + void opOpAssign(string OP, Path)(Path.Segment2 rhs) if (OP == "~" && isAnyPath!Path) { this.path = this.path ~ rhs; } + } + + /// Tests two URLs for equality using '=='. + bool opEquals(ref const URL rhs) + const nothrow { + if (m_schema != rhs.m_schema) return false; + if (m_host != rhs.m_host) return false; + if (m_path != rhs.m_path) return false; + if (m_port != rhs.m_port) return false; + return true; + } + /// ditto + bool opEquals(const URL other) const nothrow { return opEquals(other); } + + int opCmp(ref const URL rhs) const nothrow { + if (m_schema != rhs.m_schema) return m_schema.cmp(rhs.m_schema); + if (m_host != rhs.m_host) return m_host.cmp(rhs.m_host); + if (m_path != rhs.m_path) return cmp(m_path.toString, rhs.m_path.toString); + return true; + } +} + +bool isValidSchema(string schema) +@safe pure nothrow { + if (schema.length < 1) return false; + + foreach (char ch; schema) { + switch (ch) { + default: return false; + case 'a': .. case 'z': break; + case 'A': .. case 'Z': break; + case '0': .. case '9': break; + case '+', '.', '-': break; + } + } + + return true; +} + +unittest { + assert(isValidSchema("http+ssh")); + assert(isValidSchema("http")); + assert(!isValidSchema("http/ssh")); + assert(isValidSchema("HTtp")); +} + + +bool isValidHostName(string name) +@safe pure nothrow { + import std.algorithm.iteration : splitter; + import std.string : representation; + + // According to RFC 1034 + if (name.length < 1) return false; + if (name.length > 255) return false; + foreach (seg; name.representation.splitter('.')) { + if (seg.length < 1) return false; + if (seg.length > 63) return false; + if (seg[0] == '-') return false; + + foreach (char ch; seg) { + switch (ch) { + default: return false; + case 'a': .. case 'z': break; + case 'A': .. case 'Z': break; + case '0': .. case '9': break; + case '-': break; + } + } + } + return true; +} + +unittest { + assert(isValidHostName("foo")); + assert(isValidHostName("foo-")); + assert(isValidHostName("foo.bar")); + assert(isValidHostName("foo.bar-baz")); + assert(isValidHostName("foo1")); + assert(!isValidHostName("-foo")); +} + + +private enum isAnyPath(P) = is(P == InetPath) || is(P == PosixPath) || is(P == WindowsPath); + +private shared immutable(SchemaDefaultPortMap)* map_commonInternetSchemas; + +shared static this() { + auto initial_schemas = new SchemaDefaultPortMap; + initial_schemas.add("file", 0); + initial_schemas.add("tcp", 0); + initial_schemas.add("ftp", 21); + initial_schemas.add("sftp", 22); + initial_schemas.add("http", 80); + initial_schemas.add("https", 443); + initial_schemas.add("http+unix", 80); + initial_schemas.add("https+unix", 443); + initial_schemas.add("spdy", 443); + initial_schemas.add("ws", 80); + initial_schemas.add("wss", 443); + initial_schemas.add("redis", 6379); + initial_schemas.add("rtsp", 554); + initial_schemas.add("rtsps", 322); + + map_commonInternetSchemas = cast(immutable)initial_schemas; +} + +deprecated("Use the overload that accepts a `ushort port` as second argument") +void registerCommonInternetSchema(string schema) +{ + registerCommonInternetSchema(schema, 0); +} + +/** Adds the name of a schema to be treated as double-slash style. + + Params: + schema = Name of the schema + port = Default port for the schema + + See_also: `isCommonInternetSchema`, RFC 1738 Section 3.1 +*/ +void registerCommonInternetSchema(string schema, ushort port) +@trusted nothrow { + import core.atomic : atomicLoad, cas; + import std.uni : toLower; + + string lowerschema; + try { + lowerschema = schema.toLower(); + } catch (Exception e) { + assert(false, e.msg); + } + + assert(lowerschema.length < 128, "Only schemas with less than 128 characters are supported"); + + while (true) { + auto olds = atomicLoad(map_commonInternetSchemas); + auto news = olds ? olds.dup : new SchemaDefaultPortMap; + news.add(lowerschema, port); + static if (__VERSION__ < 2094) { + // work around bogus shared violation error on earlier versions of Druntime + if (cas(cast(shared(SchemaDefaultPortMap*)*)&map_commonInternetSchemas, cast(shared(SchemaDefaultPortMap)*)olds, cast(shared(SchemaDefaultPortMap)*)news)) + break; + } else { + if (cas(&map_commonInternetSchemas, olds, cast(immutable)news)) + break; + } + } +} + + +/** Determines whether an URL schema is double-slash based. + + Double slash based schemas are of the form `schema://[host]/` + and are parsed differently compared to generic schemas, which are simply + parsed as `schema:`. + + Built-in recognized double-slash schemas: ftp, http, https, + http+unix, https+unix, spdy, sftp, ws, wss, file, redis, tcp, + rtsp, rtsps + + See_also: `registerCommonInternetSchema`, RFC 1738 Section 3.1 +*/ +bool isCommonInternetSchema(string schema) +@safe nothrow @nogc { + import core.atomic : atomicLoad; + char[128] buffer; + + if (schema.length >= 128) return false; + + foreach (ix, char c; schema) + { + if (!isASCII(c)) return false; + buffer[ix] = toLower(c); + } + + scope lowerschema = buffer[0 .. schema.length]; + + return () @trusted { + auto set = atomicLoad(map_commonInternetSchemas); + return set ? set.contains(cast(string) lowerschema) : false; + } (); +} + +unittest { + assert(isCommonInternetSchema("http")); + assert(isCommonInternetSchema("HTtP")); + assert(URL.defaultPort("http") == 80); + assert(!isCommonInternetSchema("foobar")); + registerCommonInternetSchema("fooBar", 2522); + assert(isCommonInternetSchema("foobar")); + assert(isCommonInternetSchema("fOObAR")); + assert(URL.defaultPort("foobar") == 2522); + assert(URL.defaultPort("fOObar") == 2522); + + assert(URL.defaultPort("unregistered") == 0); +} + + +private struct SchemaDefaultPortMap { + ushort[string] m_data; + + void add(string str, ushort port) @safe nothrow { m_data[str] = port; } + bool contains(string str) const @safe nothrow @nogc { return !!(str in m_data); } + ushort get(string str) const @safe nothrow { return m_data[str]; } + SchemaDefaultPortMap* dup() const @safe nothrow { + auto ret = new SchemaDefaultPortMap; + foreach (s; m_data.byKeyValue) ret.add(s.key, s.value); + return ret; + } +} + +// Puny encoding +private { + /** Bootstring parameters for Punycode + These parameters are designed for Unicode + + See also: RFC 3492 Section 5 + */ + enum uint base = 36; + enum uint tmin = 1; + enum uint tmax = 26; + enum uint skew = 38; + enum uint damp = 700; + enum uint initial_bias = 72; + enum uint initial_n = 128; + + /* Bias adaptation + + See also: RFC 3492 Section 6.1 + */ + uint punyAdapt (uint pdelta, int numpoints, bool firsttime) + @safe @nogc nothrow pure { + uint delta = firsttime ? pdelta / damp : pdelta / 2; + delta += delta / numpoints; + uint k = 0; + + while (delta > ((base - tmin) * tmax) / 2) + { + delta /= (base - tmin); + k += base; + } + + return k + (((base - tmin + 1) * delta) / (delta + skew)); + } + + /* Converts puny digit-codes to code point + + See also: RFC 3492 Section 5 + */ + dchar punyDigitToCP (uint digit) + @safe @nogc nothrow pure { + return cast(dchar) (digit + 22 + 75 * (digit < 26)); + } + + /* Encodes `input` with puny encoding + + If input is all characters below `initial_n` + input is returned as is. + + See also: RFC 3492 Section 6.3 + */ + string punyEncode (in string input) + @safe { + uint n = initial_n; + uint delta = 0; + uint bias = initial_bias; + uint h; + uint b; + dchar m = dchar.max; // minchar + bool delta_overflow; + + uint input_len = 0; + auto output = appender!string(); + + output.put("xn--"); + + foreach (dchar cp; input) + { + if (cp <= initial_n) + { + output.put(cast(char) cp); + h += 1; + } + // Count length of input as code points, `input.length` counts bytes + input_len += 1; + } + + b = h; + if (b == input_len) + return input; // No need to puny encode + + if (b > 0) + output.put('-'); + + while (h < input_len) + { + m = dchar.max; + foreach (dchar cp; input) + { + if (n <= cp && cp < m) + m = cp; + } + + assert(m != dchar.max, "Punyencoding failed, cannot find code point"); + + delta = addu(delta, ((m - n) * (h + 1)), delta_overflow); + assert(!delta_overflow, "Punyencoding failed, delta overflow"); + + n = m; + + foreach (dchar cp; input) + { + if (cp < n) + delta += 1; + + if (cp == n) + { + uint q = delta; + uint k = base; + + while (true) + { + uint t; + if (k <= bias /* + tmin */) + t = tmin; + else if (k >= bias + tmax) + t = tmax; + else + t = k - bias; + + if (q < t) break; + + output.put(punyDigitToCP(t + ((q - t) % (base - t)))); + q = (q - t) / (base - t); + k += base; + } + output.put(punyDigitToCP(q)); + bias = punyAdapt(delta, h + 1, h == b); + delta = 0; + h += 1; + } + } + delta += 1; + n += 1; + } + + return output.data; + } +} + +unittest { // IPv6 + auto urlstr = "http://[2003:46:1a7b:6c01:64b:80ff:fe80:8003]:8091/abc"; + auto url = URL.parse(urlstr); + assert(url.schema == "http", url.schema); + assert(url.host == "2003:46:1a7b:6c01:64b:80ff:fe80:8003", url.host); + assert(url.port == 8091); + assert(url.path == InetPath("/abc"), url.path.toString()); + assert(url.toString == urlstr); + + url.host = "abcd:46:1a7b:6c01:64b:80ff:fe80:8abc"; + urlstr = "http://[abcd:46:1a7b:6c01:64b:80ff:fe80:8abc]:8091/abc"; + assert(url.toString == urlstr); +} + + +unittest { + auto urlstr = "https://www.example.net/index.html"; + auto url = URL.parse(urlstr); + assert(url.schema == "https", url.schema); + assert(url.host == "www.example.net", url.host); + assert(url.path == InetPath("/index.html"), url.path.toString()); + assert(url.port == 443); + assert(url.toString == urlstr); + + urlstr = "http://jo.doe:password@sub.www.example.net:4711/sub2/index.html?query#anchor"; + url = URL.parse(urlstr); + assert(url.schema == "http", url.schema); + assert(url.username == "jo.doe", url.username); + assert(url.password == "password", url.password); + assert(url.port == 4711, to!string(url.port)); + assert(url.host == "sub.www.example.net", url.host); + assert(url.path.toString() == "/sub2/index.html", url.path.toString()); + assert(url.queryString == "query", url.queryString); + assert(url.anchor == "anchor", url.anchor); + assert(url.toString == urlstr); +} + +unittest { // issue #1044 + URL url = URL.parse("http://example.com/p?query#anchor"); + assert(url.schema == "http"); + assert(url.host == "example.com"); + assert(url.port == 80); + assert(url.queryString == "query"); + assert(url.anchor == "anchor"); + assert(url.pathString == "/p"); + url.localURI = "/q"; + assert(url.schema == "http"); + assert(url.host == "example.com"); + assert(url.queryString == ""); + assert(url.anchor == ""); + assert(url.pathString == "/q"); + url.localURI = "/q?query"; + assert(url.schema == "http"); + assert(url.host == "example.com"); + assert(url.queryString == "query"); + assert(url.anchor == ""); + assert(url.pathString == "/q"); + url.localURI = "/q#anchor"; + assert(url.schema == "http"); + assert(url.host == "example.com"); + assert(url.queryString == ""); + assert(url.anchor == "anchor"); + assert(url.pathString == "/q"); +} + +//websocket unittest +unittest { + URL url = URL("ws://127.0.0.1:8080/echo"); + assert(url.host == "127.0.0.1"); + assert(url.port == 8080); + assert(url.localURI == "/echo"); +} + +//rtsp unittest +unittest { + URL url = URL("rtsp://127.0.0.1:554/echo"); + assert(url.host == "127.0.0.1"); + assert(url.port == 554); + assert(url.localURI == "/echo"); +} + +unittest { + auto p = PosixPath("/foo bar/boo oom/"); + URL url = URL("http", "example.com", 0, p); // constructor test + assert(url.path == cast(InetPath)p); + url.path = p; + assert(url.path == cast(InetPath)p); // path assignement test + assert(url.pathString == "/foo%20bar/boo%20oom/"); + assert(url.toString() == "http://example.com/foo%20bar/boo%20oom/"); + url.pathString = "/foo%20bar/boo%2foom/"; + assert(url.pathString == "/foo%20bar/boo%2foom/"); + assert(url.toString() == "http://example.com/foo%20bar/boo%2foom/"); +} + +unittest { + URL url = URL("http://user:password@example.com"); + assert(url.toString() == "http://user:password@example.com"); + + url = URL("http://user@example.com"); + assert(url.toString() == "http://user@example.com"); +} + +unittest { + auto url = URL("http://example.com/some%2bpath"); + assert((cast(PosixPath)url.path).toString() == "/some+path", url.path.toString()); +} + +unittest { + assert(URL("file:///test").pathString == "/test"); + assert(URL("file:///test").port == 0); + assert(URL("file:///test").path.toString() == "/test"); + assert(URL("file://test").host == "test"); + assert(URL("file://test").pathString() == ""); + assert(URL("file://./test").host == "."); + assert(URL("file://./test").pathString == "/test"); + assert(URL("file://./test").path.toString() == "/test"); +} + +unittest { // issue #1318 + try { + URL("http://something/inval%id"); + assert(false, "Expected to throw an exception."); + } catch (Exception e) {} +} + +unittest { + assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").schema == "http+unix"); + assert(URL("https+unix://%2Fvar%2Frun%2Fdocker.sock").schema == "https+unix"); + assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").host == "%2Fvar%2Frun%2Fdocker.sock"); + assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").pathString == ""); + assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock/container/json").pathString == "/container/json"); + auto url = URL("http+unix://%2Fvar%2Frun%2Fdocker.sock/container/json"); + assert(URL(url.toString()) == url); +} + +unittest { + import vibe.data.serialization; + static assert(isStringSerializable!URL); +} + +unittest { // issue #1732 + auto url = URL("tcp://0.0.0.0:1234"); + url.port = 4321; + assert(url.toString == "tcp://0.0.0.0:4321", url.toString); +} + +unittest { // host name role in file:// URLs + auto url = URL.parse("file:///foo/bar"); + assert(url.host == ""); + assert(url.path == InetPath("/foo/bar")); + assert(url.toString() == "file:///foo/bar"); + + url = URL.parse("file://foo/bar/baz"); + assert(url.host == "foo"); + assert(url.path == InetPath("/bar/baz")); + assert(url.toString() == "file://foo/bar/baz"); +} + +unittest { // native path <-> URL conversion + import std.exception : assertThrown; + + auto url = URL(NativePath("/foo/bar")); + assert(url.schema == "file"); + assert(url.host == ""); + assert(url.path == InetPath("/foo/bar")); + assert(url.toNativePath == NativePath("/foo/bar")); + + assertThrown(URL("http://example.org/").toNativePath); + assertThrown(URL(NativePath("foo/bar"))); +} + +unittest { // URL Normalization + auto url = URL.parse("http://example.com/foo%2a"); + assert(url.normalized.toString() == "http://example.com/foo%2A"); + + url = URL.parse("HTTP://User@Example.COM/Foo"); + assert(url.normalized.toString() == "http://User@example.com/Foo"); + + url = URL.parse("http://example.com/%7Efoo"); + assert(url.normalized.toString() == "http://example.com/~foo"); + + url = URL.parse("http://example.com/foo/./bar/baz/../qux"); + assert(url.normalized.toString() == "http://example.com/foo/bar/qux"); + + url = URL.parse("http://example.com"); + assert(url.normalized.toString() == "http://example.com/"); + + url = URL.parse("http://example.com:80/"); + assert(url.normalized.toString() == "http://example.com/"); + + url = URL.parse("hTTPs://examPLe.COM:443/my/path"); + assert(url.normalized.toString() == "https://example.com/my/path"); + + url = URL.parse("http://example.com/foo"); + url.normalize(true); + assert(url.toString() == "http://example.com/foo/"); +} + +version (Windows) unittest { // Windows drive letter paths + auto url = URL(WindowsPath(`C:\foo`)); + assert(url.schema == "file"); + assert(url.host == ""); + assert(url.path == InetPath("/C:/foo")); + auto p = url.toNativePath; + p.normalize(); + assert(p == WindowsPath(`C:\foo`)); +} + +version (Windows) unittest { // UNC paths + auto url = URL(WindowsPath(`\\server\share\path`)); + assert(url.schema == "file"); + assert(url.host == "server"); + assert(url.path == InetPath("/share/path")); + + auto p = url.toNativePath; + p.normalize(); // convert slash to backslash if necessary + assert(p == WindowsPath(`\\server\share\path`)); +} + +unittest { + assert((URL.parse("http://example.com/foo") ~ InetPath("bar")).toString() + == "http://example.com/foo/bar"); + assert((URL.parse("http://example.com/foo") ~ InetPath.Segment("bar")).toString() + == "http://example.com/foo/bar"); + + URL url = URL.parse("http://example.com/"); + url ~= InetPath("foo"); + url ~= InetPath.Segment("bar"); + assert(url.toString() == "http://example.com/foo/bar"); +} + +unittest { + assert(URL.parse("foo:/foo/bar").toString() == "foo:/foo/bar"); + assert(URL.parse("foo:/foo/bar").path.toString() == "/foo/bar"); + assert(URL.parse("foo:foo/bar").toString() == "foo:foo/bar"); +} diff --git a/source/vibe/inet/urltransfer.d b/source/vibe/inet/urltransfer.d new file mode 100644 index 0000000..3a7cc0e --- /dev/null +++ b/source/vibe/inet/urltransfer.d @@ -0,0 +1,116 @@ +/** + Downloading and uploading of data from/to URLs. + + Note that this module is scheduled for deprecation and will be replaced by + another module in the future. All functions are defined as templates to + avoid this dependency issue when building the library. + + Copyright: © 2012-2015 Sönke Ludwig + License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. + Authors: Sönke Ludwig +*/ +module vibe.inet.urltransfer; + +import vibe.core.log; +import vibe.core.file; +import vibe.inet.url; +import vibe.core.stream; +import vibe.internal.interfaceproxy : asInterface; + +import std.exception; +import std.string; + + +/** + Downloads a file from the specified URL. + + Any redirects will be followed until the actual file resource is reached or if the redirection + limit of 10 is reached. Note that only HTTP(S) is currently supported. +*/ +void download(HTTPClient_ = void*)(URL url, scope void delegate(scope InputStream) callback, HTTPClient_ client_ = null) +{ + import vibe.http.client; + + assert(url.username.length == 0 && url.password.length == 0, "Auth not supported yet."); + assert(url.schema == "http" || url.schema == "https", "Only http(s):// supported for now."); + + HTTPClient client; + static if (is(HTTPClient_ == HTTPClient)) client = client_; + if (!client) client = new HTTPClient(); + scope (exit) { + if (client_ is null) // disconnect default client + client.disconnect(); + } + + if (!url.port) + url.port = url.defaultPort; + + foreach( i; 0 .. 10 ){ + client.connect(url.host, url.port, url.schema == "https"); + logTrace("connect to %s", url.host); + bool done = false; + client.request( + (scope HTTPClientRequest req) { + req.requestURL = url.localURI; + logTrace("REQUESTING %s!", req.requestURL); + }, + (scope HTTPClientResponse res) { + logTrace("GOT ANSWER!"); + + switch( res.statusCode ){ + default: + throw new HTTPStatusException(res.statusCode, "Server responded with "~httpStatusText(res.statusCode)~" for "~url.toString()); + case HTTPStatus.ok: + done = true; + callback(res.bodyReader.asInterface!InputStream); + break; + case HTTPStatus.movedPermanently: + case HTTPStatus.found: + case HTTPStatus.seeOther: + case HTTPStatus.temporaryRedirect: + logTrace("Status code: %s", res.statusCode); + auto pv = "Location" in res.headers; + enforce(pv !is null, "Server responded with redirect but did not specify the redirect location for "~url.toString()); + logDebug("Redirect to '%s'", *pv); + if( startsWith((*pv), "http:") || startsWith((*pv), "https:") ){ + logTrace("parsing %s", *pv); + auto nurl = URL(*pv); + if (!nurl.port) + nurl.port = nurl.defaultPort; + if (url.host != nurl.host || url.schema != nurl.schema || + url.port != nurl.port) + client.disconnect(); + url = nurl; + } else + url.localURI = *pv; + break; + } + } + ); + if (done) return; + } + enforce(false, "Too many redirects!"); + assert(false); +} + +/// ditto +void download(HTTPClient_ = void*)(string url, scope void delegate(scope InputStream) callback, HTTPClient_ client_ = null) +{ + download(URL(url), callback, client_); +} + +/// ditto +void download()(string url, string filename) +{ + download(url, (scope input){ + auto fil = openFile(filename, FileMode.createTrunc); + scope(exit) fil.close(); + fil.write(input); + }); +} + +/// ditto +void download()(URL url, NativePath filename) +{ + download(url.toString(), filename.toNativeString()); +} diff --git a/source/vibe/inet/webform.d b/source/vibe/inet/webform.d new file mode 100644 index 0000000..d9c4628 --- /dev/null +++ b/source/vibe/inet/webform.d @@ -0,0 +1,642 @@ +/** + Contains HTML/urlencoded form parsing and construction routines. + + Copyright: © 2012-2014 Sönke Ludwig + License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. + Authors: Sönke Ludwig, Jan Krüger +*/ +module vibe.inet.webform; + +import vibe.container.dictionarylist; +import vibe.core.file; +import vibe.core.log; +import vibe.core.path; +import vibe.inet.message; +import vibe.internal.string; +import vibe.stream.operations; +import vibe.textfilter.urlencode; +import std.range : isOutputRange; +import std.traits : ValueType, KeyType; + +import std.array; +import std.exception; +import std.string; + + +/** + Parses form data according to an HTTP Content-Type header. + + Writes the form fields into a key-value of type $(D FormFields), parsed from the + specified $(D InputStream) and using the corresponding Content-Type header. Parsing + is gracefully aborted if the Content-Type header is unrelated. + + Params: + fields = The key-value map to which form fields must be written + files = The $(D FilePart)s mapped to the corresponding key in which details on + transmitted files will be written to. + content_type = The value of the Content-Type HTTP header. + body_reader = A valid $(D InputSteram) data stream consumed by the parser. + max_line_length = The byte-sized maximum length of lines used as boundary delimitors in Multi-Part forms. +*/ +bool parseFormData(ref FormFields fields, ref FilePartFormFields files, string content_type, InputStream body_reader, size_t max_line_length) +@safe { + auto ct_entries = content_type.split(";"); + if (!ct_entries.length) return false; + + switch (ct_entries[0].strip()) { + default: + return false; + case "application/x-www-form-urlencoded": + assert(!!body_reader); + parseURLEncodedForm(body_reader.readAllUTF8(), fields); + break; + case "multipart/form-data": + assert(!!body_reader); + parseMultiPartForm(fields, files, content_type, body_reader, max_line_length); + break; + } + return false; +} + +/** + Parses a URL encoded form and stores the key/value pairs. + + Writes to the $(D FormFields) the key-value map associated to an + "application/x-www-form-urlencoded" MIME formatted string, ie. all '+' + characters are considered as ' ' spaces. +*/ +void parseURLEncodedForm(string str, ref FormFields params) +@safe { + while (str.length > 0) { + // name part + auto idx = str.indexOf("="); + if (idx == -1) { + idx = vibe.internal.string.indexOfAny(str, "&;"); + if (idx == -1) { + params.addField(formDecode(str[0 .. $]), ""); + return; + } else { + params.addField(formDecode(str[0 .. idx]), ""); + str = str[idx+1 .. $]; + continue; + } + } else { + auto idx_amp = vibe.internal.string.indexOfAny(str, "&;"); + if (idx_amp > -1 && idx_amp < idx) { + params.addField(formDecode(str[0 .. idx_amp]), ""); + str = str[idx_amp+1 .. $]; + continue; + } else { + string name = formDecode(str[0 .. idx]); + str = str[idx+1 .. $]; + // value part + for( idx = 0; idx < str.length && str[idx] != '&' && str[idx] != ';'; idx++) {} + string value = formDecode(str[0 .. idx]); + params.addField(name, value); + str = idx < str.length ? str[idx+1 .. $] : null; + } + } + } +} + +/** + This example demonstrates parsing using all known form separators, it builds + a key-value map into the destination $(D FormFields) +*/ +unittest +{ + FormFields dst; + parseURLEncodedForm("a=b;c;dee=asd&e=fgh&f=j%20l", dst); + assert("a" in dst && dst["a"] == "b"); + assert("c" in dst && dst["c"] == ""); + assert("dee" in dst && dst["dee"] == "asd"); + assert("e" in dst && dst["e"] == "fgh"); + assert("f" in dst && dst["f"] == "j l"); +} + + +/** + Parses a form in "multipart/form-data" format. + + If any files are contained in the form, they are written to temporary files using + $(D vibe.core.file.createTempFile) and their details returned in the files field. + + Params: + fields = The key-value map to which form fields must be written + files = The $(D FilePart)s mapped to the corresponding key in which details on + transmitted files will be written to. + content_type = The value of the Content-Type HTTP header. + body_reader = A valid $(D InputSteram) data stream consumed by the parser. + max_line_length = The byte-sized maximum length of lines used as boundary delimitors in Multi-Part forms. +*/ +void parseMultiPartForm(InputStream)(ref FormFields fields, ref FilePartFormFields files, + string content_type, InputStream body_reader, size_t max_line_length) + if (isInputStream!InputStream) +{ + import std.algorithm : strip; + + auto pos = content_type.indexOf("boundary="); + enforce(pos >= 0 , "no boundary for multipart form found"); + auto boundary = content_type[pos+9 .. $].strip('"'); + auto firstBoundary = () @trusted { return cast(string)body_reader.readLine(max_line_length); } (); + enforce(firstBoundary == "--" ~ boundary, "Invalid multipart form data!"); + + while (parseMultipartFormPart(body_reader, fields, files, cast(const(ubyte)[])("\r\n--" ~ boundary), max_line_length)) {} +} + +alias FormFields = DictionaryList!(string, true, 16); +alias FilePartFormFields = DictionaryList!(FilePart, true, 0); + +@safe unittest +{ + import vibe.stream.memory; + + auto content_type = "multipart/form-data; boundary=\"AaB03x\""; + + auto input = createMemoryStream(cast(ubyte[])( + "--AaB03x\r\n" ~ + "Content-Disposition: form-data; name=\"submit-name\"\r\n" ~ + "\r\n" ~ + "Larry\r\n" ~ + "--AaB03x\r\n" ~ + "Content-Disposition: form-data; name=\"files\"; filename=\"file1.txt\"\r\n" ~ + "Content-Type: text/plain\r\n" ~ + "\r\n" ~ + "... contents of file1.txt ...\r\n" ~ + "--AaB03x--\r\n").dup, false); + + FormFields fields; + FilePartFormFields files; + + parseMultiPartForm(fields, files, content_type, input, 4096); + + assert(fields["submit-name"] == "Larry"); + assert(files["files"].filename == "file1.txt"); +} + +unittest { // issue #1220 - wrong handling of Content-Length + import vibe.stream.memory; + + auto content_type = "multipart/form-data; boundary=\"AaB03x\""; + + auto input = createMemoryStream(cast(ubyte[])( + "--AaB03x\r\n" ~ + "Content-Disposition: form-data; name=\"submit-name\"\r\n" ~ + "\r\n" ~ + "Larry\r\n" ~ + "--AaB03x\r\n" ~ + "Content-Disposition: form-data; name=\"files\"; filename=\"file1.txt\"\r\n" ~ + "Content-Type: text/plain\r\n" ~ + "Content-Length: 29\r\n" ~ + "\r\n" ~ + "... contents of file1.txt ...\r\n" ~ + "--AaB03x--\r\n" ~ + "Content-Disposition: form-data; name=\"files\"; filename=\"file2.txt\"\r\n" ~ + "Content-Type: text/plain\r\n" ~ + "\r\n" ~ + "... contents of file1.txt ...\r\n" ~ + "--AaB03x--\r\n").dup, false); + + FormFields fields; + FilePartFormFields files; + + parseMultiPartForm(fields, files, content_type, input, 4096); + + assert(fields["submit-name"] == "Larry"); + assert(files["files"].filename == "file1.txt"); +} + +unittest { // use of unquoted strings in Content-Disposition + import vibe.stream.memory; + + auto content_type = "multipart/form-data; boundary=\"AaB03x\""; + + auto input = createMemoryStream(cast(ubyte[])( + "--AaB03x\r\n" ~ + "Content-Disposition: form-data; name=submitname\r\n" ~ + "\r\n" ~ + "Larry\r\n" ~ + "--AaB03x\r\n" ~ + "Content-Disposition: form-data; name=files; filename=file1.txt\r\n" ~ + "Content-Type: text/plain\r\n" ~ + "Content-Length: 29\r\n" ~ + "\r\n" ~ + "... contents of file1.txt ...\r\n" ~ + "--AaB03x--\r\n").dup, false); + + FormFields fields; + FilePartFormFields files; + + parseMultiPartForm(fields, files, content_type, input, 4096); + + assert(fields["submitname"] == "Larry"); + assert(files["files"].filename == "file1.txt"); +} + +/** + Single part of a multipart form. + + A FilePart is the data structure for individual "multipart/form-data" parts + according to RFC 1867 section 7. +*/ +struct FilePart { + InetHeaderMap headers; + NativePath.Segment filename; + NativePath tempPath; + + // avoids NativePath.Segment.toString() being called + string toString() const { return filename.name; } +} + + +private bool parseMultipartFormPart(InputStream)(InputStream stream, ref FormFields form, ref FilePartFormFields files, const(ubyte)[] boundary, size_t max_line_length) + if (isInputStream!InputStream) +{ + //find end of quoted string + auto indexOfQuote(string str) { + foreach (i, ch; str) { + if (ch == '"' && (i == 0 || str[i-1] != '\\')) return i; + } + return -1; + } + + auto parseValue(ref string str) { + string res; + if (str[0]=='"') { + str = str[1..$]; + auto pos = indexOfQuote(str); + res = str[0..pos].replace(`\"`, `"`); + str = str[pos..$]; + } + else { + auto pos = str.indexOf(';'); + if (pos < 0) { + res = str; + str = ""; + } else { + res = str[0 .. pos]; + str = str[pos..$]; + } + } + + return res; + } + + InetHeaderMap headers; + stream.parseRFC5322Header(headers); + auto pv = "Content-Disposition" in headers; + enforce(pv, "invalid multipart"); + auto cd = *pv; + string name; + auto pos = cd.indexOf("name="); + if (pos >= 0) { + cd = cd[pos+5 .. $]; + name = parseValue(cd); + } + string filename; + pos = cd.indexOf("filename="); + if (pos >= 0) { + cd = cd[pos+9 .. $]; + filename = parseValue(cd); + } + + if (filename.length > 0) { + FilePart fp; + fp.headers = headers; + version (Have_vibe_core) + fp.filename = NativePath.Segment(filename); + else + fp.filename = PathEntry.validateFilename(filename); + + auto file = createTempFile(); + fp.tempPath = file.path; + if (auto plen = "Content-Length" in headers) { + import std.conv : to; + stream.pipe(file, (*plen).to!long); + enforce(stream.skipBytes(boundary), "Missing multi-part end boundary marker."); + } else stream.readUntil(file, boundary); + logDebug("file: %s", fp.tempPath.toString()); + file.close(); + + files.addField(name, fp); + + // TODO: temp files must be deleted after the request has been processed! + } else { + auto data = () @trusted { return cast(string)stream.readUntil(boundary); } (); + form.addField(name, data); + } + + ubyte[2] ub; + stream.read(ub, IOMode.all); + if (ub == "--") + { + stream.pipe(nullSink()); + return false; + } + enforce(ub == cast(const(ubyte)[])"\r\n"); + return true; +} + +/** + Encodes a Key-Value map into a form URL encoded string. + + Writes to the $(D OutputRange) an application/x-www-form-urlencoded MIME formatted string, + ie. all spaces ' ' are replaced by the '+' character + + Params: + dst = The destination $(D OutputRange) where the resulting string must be written to. + map = An iterable key-value map iterable with $(D foreach(string key, string value; map)). + sep = A valid form separator, common values are '&' or ';' +*/ +void formEncode(R, T)(auto ref R dst, T map, char sep = '&') + if (isFormMap!T && isOutputRange!(R, char)) +{ + formEncodeImpl(dst, map, sep, true); +} + +/** + The following example demonstrates the use of $(D formEncode) with a json map, + the ordering of keys will be preserved in $(D Bson) and $(D DictionaryList) objects only. + */ +unittest { + import std.array : Appender; + string[string] map; + map["numbers"] = "123456789"; + map["spaces"] = "1 2 3 4 a b c d"; + + Appender!string app; + app.formEncode(map); + assert(app.data == "spaces=1+2+3+4+a+b+c+d&numbers=123456789" || + app.data == "numbers=123456789&spaces=1+2+3+4+a+b+c+d"); +} + +/** + Encodes a Key-Value map into a form URL encoded string. + + Returns an application/x-www-form-urlencoded MIME formatted string, + ie. all spaces ' ' are replaced by the '+' character + + Params: + map = An iterable key-value map iterable with $(D foreach(string key, string value; map)). + sep = A valid form separator, common values are '&' or ';' +*/ +string formEncode(T)(T map, char sep = '&') + if (isFormMap!T) +{ + return formEncodeImpl(map, sep, true); +} + +/// Ditto +string formEncode(T : DictionaryList!Args, Args...)(T map, char sep = '&') +{ + return formEncodeImpl(map.byKeyValue(), sep, true); +} + +/** + Writes to the $(D OutputRange) an URL encoded string as specified in RFC 3986 section 2 + + Params: + dst = The destination $(D OutputRange) where the resulting string must be written to. + map = An iterable key-value map iterable with $(D foreach(string key, string value; map)). +*/ +void urlEncode(R, T)(auto ref R dst, T map) + if (isFormMap!T && isOutputRange!(R, char)) +{ + formEncodeImpl(dst, map, "&", false); +} + + +/** + Returns an URL encoded string as specified in RFC 3986 section 2 + + Params: + map = An iterable key-value map iterable with $(D foreach(string key, string value; map)). +*/ +string urlEncode(T)(T map) + if (isFormMap!T) +{ + return formEncodeImpl(map, '&', false); +} + +/// Ditto +string urlEncode(T : DictionaryList!Args, Args...)(T map) +{ + return formEncodeImpl(map.byKeyValue, '&', false); +} + +/** + Tests if a given type is suitable for storing a web form. + + Types that define iteration support with the key typed as $(D string) and + the value either also typed as $(D string), or as a $(D vibe.data.json.Json) + like value. The latter case specifically requires a $(D .type) property that + is tested for equality with $(D T.Type.string), as well as a + $(D .get!string) method. +*/ +template isFormMap(T) +{ + import std.conv; + enum isFormMap = isStringMap!T || isJsonLike!T; +} + +private template isStringMap(T) +{ + enum isStringMap = __traits(compiles, () { + foreach (string key, string value; T.init) {} + } ()); +} + +unittest { + static assert(isStringMap!(string[string])); + + static struct M { + int opApply(int delegate(string key, string value)) { return 0; } + } + static assert(isStringMap!M); +} + +private template isJsonLike(T) +{ + enum isJsonLike = __traits(compiles, () { + import std.conv; + string r; + foreach (string key, value; T.init) + r = value.type == T.Type.string ? value.get!string : value.to!string; + } ()); +} + +unittest { + import vibe.data.json; + import vibe.data.bson; + static assert(isJsonLike!Json); + static assert(isJsonLike!Bson); +} + +private string formEncodeImpl(T)(T map, char sep, bool form_encode) + if (isStringMap!T) +{ + import std.array : Appender; + Appender!string dst; + size_t len; + + foreach (key, ref value; map) { + len += key.length; + len += value.length; + } + + // characters will be expanded, better use more space the first time and avoid additional allocations + dst.reserve(len*2); + dst.formEncodeImpl(map, sep, form_encode); + return dst.data; +} + + +private string formEncodeImpl(T)(T map, char sep, bool form_encode) + if (isJsonLike!T) +{ + import std.array : Appender; + Appender!string dst; + size_t len; + + foreach (string key, T value; map) { + len += key.length; + len += value.length; + } + + // characters will be expanded, better use more space the first time and avoid additional allocations + dst.reserve(len*2); + dst.formEncodeImpl(map, sep, form_encode); + return dst.data; +} + +private void formEncodeImpl(R, T)(auto ref R dst, T map, char sep, bool form_encode) + if (isOutputRange!(R, string) && isStringMap!T) +{ + bool flag; + + foreach (key, value; map) { + if (flag) + dst.put(sep); + else + flag = true; + filterURLEncode(dst, key, null, form_encode); + dst.put("="); + filterURLEncode(dst, value, null, form_encode); + } +} + +private void formEncodeImpl(R, T)(auto ref R dst, T map, char sep, bool form_encode) + if (isOutputRange!(R, string) && isJsonLike!T) +{ + bool flag; + + foreach (string key, T value; map) { + if (flag) + dst.put(sep); + else + flag = true; + filterURLEncode(dst, key, null, form_encode); + dst.put("="); + if (value.type == T.Type.string) + filterURLEncode(dst, value.get!string, null, form_encode); + else { + static if (T.stringof == "Json") + filterURLEncode(dst, value.to!string, null, form_encode); + else + filterURLEncode(dst, value.toString(), null, form_encode); + + } + } +} + +unittest +{ + import vibe.data.json : Json; + import vibe.data.bson : Bson; + import std.algorithm.sorting : sort; + + string[string] aaMap; + DictionaryList!string dlMap; + Json jsonMap = Json.emptyObject; + Bson bsonMap = Bson.emptyObject; + + aaMap["unicode"] = "╤╳"; + aaMap["numbers"] = "123456789"; + aaMap["spaces"] = "1 2 3 4 a b c d"; + aaMap["slashes"] = "1/2/3/4/5"; + aaMap["equals"] = "1=2=3=4=5=6=7"; + aaMap["complex"] = "╤╳/=$$\"'1!2()'\""; + aaMap["╤╳"] = "1"; + + + dlMap["unicode"] = "╤╳"; + dlMap["numbers"] = "123456789"; + dlMap["spaces"] = "1 2 3 4 a b c d"; + dlMap["slashes"] = "1/2/3/4/5"; + dlMap["equals"] = "1=2=3=4=5=6=7"; + dlMap["complex"] = "╤╳/=$$\"'1!2()'\""; + dlMap["╤╳"] = "1"; + + + jsonMap["unicode"] = "╤╳"; + jsonMap["numbers"] = "123456789"; + jsonMap["spaces"] = "1 2 3 4 a b c d"; + jsonMap["slashes"] = "1/2/3/4/5"; + jsonMap["equals"] = "1=2=3=4=5=6=7"; + jsonMap["complex"] = "╤╳/=$$\"'1!2()'\""; + jsonMap["╤╳"] = "1"; + + bsonMap["unicode"] = "╤╳"; + bsonMap["numbers"] = "123456789"; + bsonMap["spaces"] = "1 2 3 4 a b c d"; + bsonMap["slashes"] = "1/2/3/4/5"; + bsonMap["equals"] = "1=2=3=4=5=6=7"; + bsonMap["complex"] = "╤╳/=$$\"'1!2()'\""; + bsonMap["╤╳"] = "1"; + + assert(urlEncode(aaMap).split('&').sort().join("&") == "%E2%95%A4%E2%95%B3=1&complex=%E2%95%A4%E2%95%B3%2F%3D%24%24%22%271%212%28%29%27%22&equals=1%3D2%3D3%3D4%3D5%3D6%3D7&numbers=123456789&slashes=1%2F2%2F3%2F4%2F5&spaces=1%202%203%204%20a%20b%20c%20d&unicode=%E2%95%A4%E2%95%B3"); + assert(urlEncode(dlMap) == "unicode=%E2%95%A4%E2%95%B3&numbers=123456789&spaces=1%202%203%204%20a%20b%20c%20d&slashes=1%2F2%2F3%2F4%2F5&equals=1%3D2%3D3%3D4%3D5%3D6%3D7&complex=%E2%95%A4%E2%95%B3%2F%3D%24%24%22%271%212%28%29%27%22&%E2%95%A4%E2%95%B3=1"); + assert(urlEncode(jsonMap).split('&').sort().join("&") == "%E2%95%A4%E2%95%B3=1&complex=%E2%95%A4%E2%95%B3%2F%3D%24%24%22%271%212%28%29%27%22&equals=1%3D2%3D3%3D4%3D5%3D6%3D7&numbers=123456789&slashes=1%2F2%2F3%2F4%2F5&spaces=1%202%203%204%20a%20b%20c%20d&unicode=%E2%95%A4%E2%95%B3"); + assert(urlEncode(bsonMap) == "unicode=%E2%95%A4%E2%95%B3&numbers=123456789&spaces=1%202%203%204%20a%20b%20c%20d&slashes=1%2F2%2F3%2F4%2F5&equals=1%3D2%3D3%3D4%3D5%3D6%3D7&complex=%E2%95%A4%E2%95%B3%2F%3D%24%24%22%271%212%28%29%27%22&%E2%95%A4%E2%95%B3=1"); + { + FormFields aaFields; + parseURLEncodedForm(urlEncode(aaMap), aaFields); + assert(urlEncode(aaMap) == urlEncode(aaFields)); + + FormFields dlFields; + parseURLEncodedForm(urlEncode(dlMap), dlFields); + assert(urlEncode(dlMap) == urlEncode(dlFields)); + + FormFields jsonFields; + parseURLEncodedForm(urlEncode(jsonMap), jsonFields); + assert(urlEncode(jsonMap) == urlEncode(jsonFields)); + + FormFields bsonFields; + parseURLEncodedForm(urlEncode(bsonMap), bsonFields); + assert(urlEncode(bsonMap) == urlEncode(bsonFields)); + } + + assert(formEncode(aaMap).split('&').sort().join("&") == "%E2%95%A4%E2%95%B3=1&complex=%E2%95%A4%E2%95%B3%2F%3D%24%24%22%271%212%28%29%27%22&equals=1%3D2%3D3%3D4%3D5%3D6%3D7&numbers=123456789&slashes=1%2F2%2F3%2F4%2F5&spaces=1+2+3+4+a+b+c+d&unicode=%E2%95%A4%E2%95%B3"); + assert(formEncode(dlMap) == "unicode=%E2%95%A4%E2%95%B3&numbers=123456789&spaces=1+2+3+4+a+b+c+d&slashes=1%2F2%2F3%2F4%2F5&equals=1%3D2%3D3%3D4%3D5%3D6%3D7&complex=%E2%95%A4%E2%95%B3%2F%3D%24%24%22%271%212%28%29%27%22&%E2%95%A4%E2%95%B3=1"); + assert(formEncode(jsonMap).split('&').sort().join("&") == "%E2%95%A4%E2%95%B3=1&complex=%E2%95%A4%E2%95%B3%2F%3D%24%24%22%271%212%28%29%27%22&equals=1%3D2%3D3%3D4%3D5%3D6%3D7&numbers=123456789&slashes=1%2F2%2F3%2F4%2F5&spaces=1+2+3+4+a+b+c+d&unicode=%E2%95%A4%E2%95%B3"); + assert(formEncode(bsonMap) == "unicode=%E2%95%A4%E2%95%B3&numbers=123456789&spaces=1+2+3+4+a+b+c+d&slashes=1%2F2%2F3%2F4%2F5&equals=1%3D2%3D3%3D4%3D5%3D6%3D7&complex=%E2%95%A4%E2%95%B3%2F%3D%24%24%22%271%212%28%29%27%22&%E2%95%A4%E2%95%B3=1"); + + { + FormFields aaFields; + parseURLEncodedForm(formEncode(aaMap), aaFields); + assert(formEncode(aaMap) == formEncode(aaFields)); + + FormFields dlFields; + parseURLEncodedForm(formEncode(dlMap), dlFields); + assert(formEncode(dlMap) == formEncode(dlFields)); + + FormFields jsonFields; + parseURLEncodedForm(formEncode(jsonMap), jsonFields); + assert(formEncode(jsonMap) == formEncode(jsonFields)); + + FormFields bsonFields; + parseURLEncodedForm(formEncode(bsonMap), bsonFields); + assert(formEncode(bsonMap) == formEncode(bsonFields)); + } + +} diff --git a/textfilter/dub.sdl b/textfilter/dub.sdl new file mode 100644 index 0000000..ec58f44 --- /dev/null +++ b/textfilter/dub.sdl @@ -0,0 +1,6 @@ +name "textfilter" +description "Text filtering routines" +targetType "library" +dependency "vibe-core" version=">=2.0.0 <3.0.0-0" +sourcePaths "." +importPaths "." diff --git a/textfilter/vibe/textfilter/html.d b/textfilter/vibe/textfilter/html.d new file mode 100644 index 0000000..8992dc8 --- /dev/null +++ b/textfilter/vibe/textfilter/html.d @@ -0,0 +1,190 @@ +/** + HTML character entity escaping. + + TODO: Make things @safe once Appender is. + + Copyright: © 2012-2014 Sönke Ludwig + License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. + Authors: Sönke Ludwig +*/ +module vibe.textfilter.html; + +import std.array; +import std.conv; +import std.range; + + +/** Returns the HTML escaped version of a given string. +*/ +string htmlEscape(R)(R str) @trusted + if (isInputRange!R) +{ + if (__ctfe) { // appender is a performance/memory hog in ctfe + StringAppender dst; + filterHTMLEscape(dst, str); + return dst.data; + } else { + auto dst = appender!string(); + filterHTMLEscape(dst, str); + return dst.data; + } +} + +/// +unittest { + assert(htmlEscape(`"Hello", !`) == `"Hello", <World>!`); +} + + +/** Writes the HTML escaped version of a given string to an output range. +*/ +void filterHTMLEscape(R, S)(ref R dst, S str, HTMLEscapeFlags flags = HTMLEscapeFlags.escapeNewline) + if (isOutputRange!(R, dchar) && isInputRange!S) +{ + for (;!str.empty;str.popFront()) + filterHTMLEscape(dst, str.front, flags); +} + + +/** Returns the HTML escaped version of a given string (also escapes double quotes). +*/ +string htmlAttribEscape(R)(R str) @trusted + if (isInputRange!R) +{ + if (__ctfe) { // appender is a performance/memory hog in ctfe + StringAppender dst; + filterHTMLAttribEscape(dst, str); + return dst.data; + } else { + auto dst = appender!string(); + filterHTMLAttribEscape(dst, str); + return dst.data; + } +} + +/// +unittest { + assert(htmlAttribEscape(`"Hello", !`) == `"Hello", <World>!`); +} + + +/** Writes the HTML escaped version of a given string to an output range (also escapes double quotes). +*/ +void filterHTMLAttribEscape(R, S)(ref R dst, S str) + if (isOutputRange!(R, dchar) && isInputRange!S) +{ + for (; !str.empty; str.popFront()) + filterHTMLEscape(dst, str.front, HTMLEscapeFlags.escapeNewline|HTMLEscapeFlags.escapeQuotes); +} + + +/** Returns the HTML escaped version of a given string (escapes every character). +*/ +string htmlAllEscape(R)(R str) @trusted + if (isInputRange!R) +{ + if (__ctfe) { // appender is a performance/memory hog in ctfe + StringAppender dst; + filterHTMLAllEscape(dst, str); + return dst.data; + } else { + auto dst = appender!string(); + filterHTMLAllEscape(dst, str); + return dst.data; + } +} + +/// +unittest { + assert(htmlAllEscape("Hello!") == "Hello!"); +} + + +/** Writes the HTML escaped version of a given string to an output range (escapes every character). +*/ +void filterHTMLAllEscape(R, S)(ref R dst, S str) + if (isOutputRange!(R, dchar) && isInputRange!S) +{ + for (; !str.empty; str.popFront()) { + put(dst, "&#"); + put(dst, to!string(cast(uint)str.front)); + put(dst, ';'); + } +} + + +/** + Minimally escapes a text so that no HTML tags appear in it. +*/ +string htmlEscapeMin(R)(R str) @trusted + if (isInputRange!R) +{ + auto dst = appender!string(); + for (; !str.empty; str.popFront()) + filterHTMLEscape(dst, str.front, HTMLEscapeFlags.escapeMinimal); + return dst.data(); +} + + +/** + Writes the HTML escaped version of a character to an output range. +*/ +void filterHTMLEscape(R)(ref R dst, dchar ch, HTMLEscapeFlags flags = HTMLEscapeFlags.escapeNewline ) +{ + switch (ch) { + default: + if (flags & HTMLEscapeFlags.escapeUnknown) { + put(dst, "&#"); + put(dst, to!string(cast(uint)ch)); + put(dst, ';'); + } else put(dst, ch); + break; + case '"': + if (flags & HTMLEscapeFlags.escapeQuotes) put(dst, """); + else put(dst, '"'); + break; + case '\'': + if (flags & HTMLEscapeFlags.escapeQuotes) put(dst, "'"); + else put(dst, '\''); + break; + case '\r', '\n': + if (flags & HTMLEscapeFlags.escapeNewline) { + put(dst, "&#"); + put(dst, to!string(cast(uint)ch)); + put(dst, ';'); + } else put(dst, ch); + break; + case 'a': .. case 'z': goto case; + case 'A': .. case 'Z': goto case; + case '0': .. case '9': goto case; + case ' ', '\t', '-', '_', '.', ':', ',', ';', + '#', '+', '*', '?', '=', '(', ')', '/', '!', + '%' , '{', '}', '[', ']', '`', '´', '$', '^', '~': + put(dst, cast(char)ch); + break; + case '<': put(dst, "<"); break; + case '>': put(dst, ">"); break; + case '&': put(dst, "&"); break; + } +} + + +enum HTMLEscapeFlags { + escapeMinimal = 0, + escapeQuotes = 1<<0, + escapeNewline = 1<<1, + escapeUnknown = 1<<2 +} + +private struct StringAppender { +@safe: + + string data; + void put(string s) { data ~= s; } + void put(char ch) { data ~= ch; } + void put(dchar ch) { + import std.utf; + char[4] dst; + data ~= dst[0 .. encode(dst, ch)]; + } +} diff --git a/textfilter/vibe/textfilter/markdown.d b/textfilter/vibe/textfilter/markdown.d new file mode 100644 index 0000000..3972279 --- /dev/null +++ b/textfilter/vibe/textfilter/markdown.d @@ -0,0 +1,1752 @@ +/** + Markdown parser implementation + + Copyright: © 2012-2019 Sönke Ludwig + License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. + Authors: Sönke Ludwig +*/ +module vibe.textfilter.markdown; + +import vibe.textfilter.html; + +import std.algorithm : any, all, canFind, countUntil, min; +import std.array; +import std.format; +import std.range; +import std.utf : byCodeUnit; +import std.string; + +/* + TODO: + detect inline HTML tags +*/ + + +/** Returns a Markdown filtered HTML string. +*/ +string filterMarkdown()(string str, MarkdownFlags flags) +@trusted { // scope class is not @safe for DMD 2.072 + scope settings = new MarkdownSettings; + settings.flags = flags; + return filterMarkdown(str, settings); +} +/// ditto +string filterMarkdown()(string str, scope MarkdownSettings settings = null) +@trusted { // Appender not @safe as of 2.065 + auto dst = appender!string(); + filterMarkdown(dst, str, settings); + return dst.data; +} + + +/** Markdown filters the given string and writes the corresponding HTML to an output range. +*/ +void filterMarkdown(R)(ref R dst, string src, MarkdownFlags flags) +{ + scope settings = new MarkdownSettings; + settings.flags = flags; + filterMarkdown(dst, src, settings); +} +/// ditto +void filterMarkdown(R)(ref R dst, string src, scope MarkdownSettings settings = null) +{ + if (!settings) settings = new MarkdownSettings; + + auto all_lines = splitLines(src); + auto links = scanForReferences(all_lines); + auto lines = parseLines(all_lines, settings); + Block root_block; + parseBlocks(root_block, lines, null, settings); + writeBlock(dst, root_block, links, settings); +} + +/** + Returns the hierarchy of sections +*/ +Section[] getMarkdownOutline(string markdown_source, scope MarkdownSettings settings = null) +{ + import std.conv : to; + + if (!settings) settings = new MarkdownSettings; + auto all_lines = splitLines(markdown_source); + auto lines = parseLines(all_lines, settings); + Block root_block; + parseBlocks(root_block, lines, null, settings); + Section root; + + foreach (ref sb; root_block.blocks) { + if (sb.type == BlockType.header) { + auto s = &root; + while (true) { + if (s.subSections.length == 0) break; + if (s.subSections[$-1].headingLevel >= sb.headerLevel) break; + s = &s.subSections[$-1]; + } + s.subSections ~= Section(sb.headerLevel, sb.text[0], sb.text[0].asSlug.to!string); + } + } + + return root.subSections; +} + +/// +unittest { + import std.conv : to; + assert (getMarkdownOutline("## first\n## second\n### third\n# fourth\n### fifth") == + [ + Section(2, " first", "first"), + Section(2, " second", "second", [ + Section(3, " third", "third") + ]), + Section(1, " fourth", "fourth", [ + Section(3, " fifth", "fifth") + ]) + ] + ); +} + +final class MarkdownSettings { + /// Controls the capabilities of the parser. + MarkdownFlags flags = MarkdownFlags.vanillaMarkdown; + + /// Heading tags will start at this level. + size_t headingBaseLevel = 1; + + /// Called for every link/image URL to perform arbitrary transformations. + string delegate(string url_or_path, bool is_image) urlFilter; + + /// White list of URI schemas that can occur in link/image targets + string[] allowedURISchemas = ["http", "https", "ftp", "mailto"]; +} + +enum MarkdownFlags { + /** Same as `vanillaMarkdown` + */ + none = 0, + + /** Convert line breaks into hard line breaks in the output + + This option is useful when operating on text that may be formatted as + plain text, without having Markdown in mind, while still improving + the appearance of the text in many cases. A common example would be + to format e-mails or newsgroup posts. + */ + keepLineBreaks = 1<<0, + + /** Support fenced code blocks. + */ + backtickCodeBlocks = 1<<1, + + /** Disable support for embedded HTML + */ + noInlineHtml = 1<<2, + //noLinks = 1<<3, + //allowUnsafeHtml = 1<<4, + + /** Support table definitions + + The syntax is based on Markdown Extra and GitHub flavored Markdown. + */ + tables = 1<<5, + + /** Support HTML attributes after links + + Links or images directly followed by `{ … }` allow regular HTML + attributes to added to the generated HTML element. + */ + attributes = 1<<6, + + /** Recognize figure definitions + + Figures can be defined using a modified list syntax: + + ``` + - %%% + This is the figure content + + - ### + This is optional caption content + ``` + + Just like for lists, arbitrary blocks can be nested within figure and + figure caption blocks. If only a single paragraph is present within a + figure caption block, the paragraph text will be emitted without the + surrounding `

` tags. The same is true for figure blocks that contain + only a single paragraph and any number of additional figure caption + blocks. + */ + figures = 1<<7, + + /** Support only standard Markdown features + + Note that the parser is not fully CommonMark compliant at the moment, + but this is the general idea behind this option. + */ + vanillaMarkdown = none, + + /** Default set of flags suitable for use within an online forum + */ + forumDefault = keepLineBreaks|backtickCodeBlocks|noInlineHtml|tables +} + +struct Section { + size_t headingLevel; + string caption; + string anchor; + Section[] subSections; +} + +private { + immutable s_blockTags = ["div", "ol", "p", "pre", "section", "table", "ul"]; +} + +private enum IndentType { + white, + quote +} + +private enum LineType { + undefined, + blank, + plain, + hline, + atxHeader, + setextHeader, + tableSeparator, + uList, + oList, + figure, + figureCaption, + htmlBlock, + codeBlockDelimiter +} + +private struct Line { + LineType type; + IndentType[] indent; + string text; + string unindented; + + string unindent(size_t n) + pure @safe { + assert (n <= indent.length); + string ln = text; + foreach (i; 0 .. n) { + final switch(indent[i]){ + case IndentType.white: + if (ln[0] == ' ') ln = ln[4 .. $]; + else ln = ln[1 .. $]; + break; + case IndentType.quote: + ln = ln.stripLeft()[1 .. $]; + if (ln.startsWith(' ')) + ln.popFront(); + break; + } + } + return ln; + } +} + +private Line[] parseLines(string[] lines, scope MarkdownSettings settings) +pure @safe { + Line[] ret; + while( !lines.empty ){ + auto ln = lines.front; + lines.popFront(); + + Line lninfo; + lninfo.text = ln; + + while (ln.length > 0) { + if (ln[0] == '\t') { + lninfo.indent ~= IndentType.white; + ln.popFront(); + } else if (ln.startsWith(" ")) { + lninfo.indent ~= IndentType.white; + ln.popFrontN(4); + } else { + if (ln.stripLeft().startsWith(">")) { + lninfo.indent ~= IndentType.quote; + ln = ln.stripLeft(); + ln.popFront(); + if (ln.startsWith(' ')) + ln.popFront(); + } else break; + } + } + lninfo.unindented = ln; + + if ((settings.flags & MarkdownFlags.backtickCodeBlocks) && isCodeBlockDelimiter(ln)) + lninfo.type = LineType.codeBlockDelimiter; + else if(isAtxHeaderLine(ln)) lninfo.type = LineType.atxHeader; + else if(isSetextHeaderLine(ln)) lninfo.type = LineType.setextHeader; + else if((settings.flags & MarkdownFlags.tables) && isTableSeparatorLine(ln)) + lninfo.type = LineType.tableSeparator; + else if(isHlineLine(ln)) lninfo.type = LineType.hline; + else if(isOListLine(ln)) lninfo.type = LineType.oList; + else if(isUListLine(ln)) { + if (settings.flags & MarkdownFlags.figures) { + auto suff = removeListPrefix(ln, LineType.uList); + if (suff == "%%%") lninfo.type = LineType.figure; + else if (suff == "###") lninfo.type = LineType.figureCaption; + else lninfo.type = LineType.uList; + } else lninfo.type = LineType.uList; + } else if(isLineBlank(ln)) lninfo.type = LineType.blank; + else if(!(settings.flags & MarkdownFlags.noInlineHtml) && isHtmlBlockLine(ln)) + lninfo.type = LineType.htmlBlock; + else lninfo.type = LineType.plain; + + ret ~= lninfo; + } + return ret; +} + +unittest { + import std.conv : to; + auto s = new MarkdownSettings; + s.flags = MarkdownFlags.forumDefault; + auto lns = [">```D"]; + assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote], lns[0], "```D")]); + lns = ["> ```D"]; + assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote], lns[0], "```D")]); + lns = ["> ```D"]; + assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote], lns[0], " ```D")]); + lns = ["> ```D"]; + assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote, IndentType.white], lns[0], "```D")]); + lns = [">test"]; + assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote], lns[0], "test")]); + lns = ["> test"]; + assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote], lns[0], "test")]); + lns = ["> test"]; + assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote], lns[0], " test")]); + lns = ["> test"]; + assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote, IndentType.white], lns[0], "test")]); +} + +private enum BlockType { + plain, + text, + paragraph, + header, + table, + oList, + uList, + listItem, + code, + quote, + figure, + figureCaption +} + +private struct Block { + BlockType type; + Attribute[] attributes; + string[] text; + Block[] blocks; + size_t headerLevel; + Alignment[] columns; +} + +private struct Attribute { + string attribute; + string value; +} + +private enum Alignment { + none = 0, + left = 1<<0, + right = 1<<1, + center = left | right +} + +private void parseBlocks(ref Block root, ref Line[] lines, IndentType[] base_indent, scope MarkdownSettings settings) +pure @safe { + import std.conv : to; + import std.algorithm.comparison : among; + + if (base_indent.length == 0) root.type = BlockType.text; + else if (base_indent[$-1] == IndentType.quote) root.type = BlockType.quote; + + while (!lines.empty) { + auto ln = lines.front; + + if (ln.type == LineType.blank) { + lines.popFront(); + continue; + } + + if (ln.indent != base_indent) { + if (ln.indent.length < base_indent.length + || ln.indent[0 .. base_indent.length] != base_indent) + { + return; + } + + auto cindent = base_indent ~ IndentType.white; + if (ln.indent == cindent) { + Block cblock; + cblock.type = BlockType.code; + while (!lines.empty && (lines.front.unindented.strip.empty + || lines.front.indent.length >= cindent.length + && lines.front.indent[0 .. cindent.length] == cindent)) + { + cblock.text ~= lines.front.indent.length >= cindent.length + ? lines.front.unindent(cindent.length) : ""; + lines.popFront(); + } + root.blocks ~= cblock; + } else { + Block subblock; + parseBlocks(subblock, lines, ln.indent[0 .. base_indent.length+1], settings); + root.blocks ~= subblock; + } + } else { + Block b; + final switch (ln.type) { + case LineType.undefined: assert (false); + case LineType.blank: assert (false); + case LineType.plain: + if (lines.length >= 2 && lines[1].type == LineType.setextHeader) { + auto setln = lines[1].unindented; + b.type = BlockType.header; + b.text = [ln.unindented]; + if (settings.flags & MarkdownFlags.attributes) + parseAttributeString(skipAttributes(b.text[0]), b.attributes); + if (!b.attributes.canFind!(a => a.attribute == "id")) + b.attributes ~= Attribute("id", asSlug(b.text[0]).to!string); + b.headerLevel = setln.strip()[0] == '=' ? 1 : 2; + lines.popFrontN(2); + } else if (lines.length >= 2 && lines[1].type == LineType.tableSeparator + && ln.unindented.indexOf('|') >= 0) + { + auto setln = lines[1].unindented; + b.type = BlockType.table; + b.text = [ln.unindented]; + foreach (c; getTableColumns(setln)) { + Alignment a = Alignment.none; + if (c.startsWith(':')) a |= Alignment.left; + if (c.endsWith(':')) a |= Alignment.right; + b.columns ~= a; + } + + lines.popFrontN(2); + while (!lines.empty && lines[0].unindented.indexOf('|') >= 0) { + b.text ~= lines.front.unindented; + lines.popFront(); + } + } else { + b.type = BlockType.paragraph; + b.text = skipText(lines, base_indent); + } + break; + case LineType.hline: + b.type = BlockType.plain; + b.text = ["


"]; + lines.popFront(); + break; + case LineType.atxHeader: + b.type = BlockType.header; + string hl = ln.unindented; + b.headerLevel = 0; + while (hl.length > 0 && hl[0] == '#') { + b.headerLevel++; + hl = hl[1 .. $]; + } + + if (settings.flags & MarkdownFlags.attributes) + parseAttributeString(skipAttributes(hl), b.attributes); + if (!b.attributes.canFind!(a => a.attribute == "id")) + b.attributes ~= Attribute("id", asSlug(hl).to!string); + + while (hl.length > 0 && (hl[$-1] == '#' || hl[$-1] == ' ')) + hl = hl[0 .. $-1]; + b.text = [hl]; + lines.popFront(); + break; + case LineType.setextHeader: + lines.popFront(); + break; + case LineType.tableSeparator: + lines.popFront(); + break; + case LineType.figure: + case LineType.figureCaption: + b.type = ln.type == LineType.figure + ? BlockType.figure : BlockType.figureCaption; + + auto itemindent = base_indent ~ IndentType.white; + lines.popFront(); + parseBlocks(b, lines, itemindent, settings); + break; + case LineType.uList: + case LineType.oList: + b.type = ln.type == LineType.uList ? BlockType.uList : BlockType.oList; + + auto itemindent = base_indent ~ IndentType.white; + bool paraMode = false; + + // look ahead to determine whether the list is in paragraph + // mode (one or multiple

nested within each item + bool couldBeParaMode = false; + foreach (pln; lines[1 .. $]) { + if (pln.type == LineType.blank) { + couldBeParaMode = true; + continue; + } + if (!pln.indent.startsWith(base_indent)) break; + if (pln.indent == base_indent) { + if (pln.type == ln.type) + paraMode = couldBeParaMode; + break; + } + } + + while (!lines.empty && lines.front.type == ln.type + && lines.front.indent == base_indent) + { + Block itm; + itm.text = skipText(lines, itemindent); + itm.text[0] = removeListPrefix(itm.text[0], ln.type); + + if (paraMode) { + Block para; + para.type = BlockType.paragraph; + para.text = itm.text; + itm.blocks ~= para; + itm.text = null; + } + + parseBlocks(itm, lines, itemindent, settings); + itm.type = BlockType.listItem; + b.blocks ~= itm; + } + break; + case LineType.htmlBlock: + int nestlevel = 0; + auto starttag = parseHtmlBlockLine(ln.unindented); + if (!starttag.isHtmlBlock || !starttag.open) + break; + + b.type = BlockType.plain; + while (!lines.empty) { + if (lines.front.indent.length < base_indent.length) + break; + if (lines.front.indent[0 .. base_indent.length] != base_indent) + break; + + auto str = lines.front.unindent(base_indent.length); + auto taginfo = parseHtmlBlockLine(str); + b.text ~= lines.front.unindent(base_indent.length); + lines.popFront(); + if (taginfo.isHtmlBlock && taginfo.tagName == starttag.tagName) + nestlevel += taginfo.open ? 1 : -1; + if (nestlevel <= 0) break; + } + break; + case LineType.codeBlockDelimiter: + lines.popFront(); // TODO: get language from line + b.type = BlockType.code; + while (!lines.empty) { + if (lines.front.indent.length < base_indent.length) + break; + if (lines.front.indent[0 .. base_indent.length] != base_indent) + break; + if (lines.front.type == LineType.codeBlockDelimiter) { + lines.popFront(); + break; + } + b.text ~= lines.front.unindent(base_indent.length); + lines.popFront(); + } + break; + } + root.blocks ~= b; + } + } +} + + +private string[] skipText(ref Line[] lines, IndentType[] indent) +pure @safe { + static bool matchesIndent(IndentType[] indent, IndentType[] base_indent) + { + if (indent.length > base_indent.length) return false; + if (indent != base_indent[0 .. indent.length]) return false; + sizediff_t qidx = -1; + foreach_reverse (i, tp; base_indent) + if (tp == IndentType.quote) { + qidx = i; + break; + } + if (qidx >= 0) { + qidx = base_indent.length-1 - qidx; + if( indent.length <= qidx ) return false; + } + return true; + } + + // return value is used in variables that don't get bounds checks on the + // first element, so we should return at least one + if (lines.empty) + return [""]; + + string[] ret; + + while (true) { + ret ~= lines.front.unindent(min(indent.length, lines.front.indent.length)); + lines.popFront(); + + if (lines.empty || !matchesIndent(lines.front.indent, indent) + || lines.front.type != LineType.plain) + { + return ret; + } + } +} + +/// private +private void writeBlock(R)(ref R dst, ref const Block block, LinkRef[string] links, scope MarkdownSettings settings) +{ + final switch (block.type) { + case BlockType.plain: + foreach (ln; block.text) { + put(dst, ln); + put(dst, "\n"); + } + foreach (b; block.blocks) + writeBlock(dst, b, links, settings); + break; + case BlockType.text: + writeMarkdownEscaped(dst, block, links, settings); + foreach (b; block.blocks) + writeBlock(dst, b, links, settings); + break; + case BlockType.paragraph: + assert (block.blocks.length == 0); + put(dst, "

"); + writeMarkdownEscaped(dst, block, links, settings); + put(dst, "

\n"); + break; + case BlockType.header: + assert (block.blocks.length == 0); + assert (block.text.length == 1); + auto hlvl = block.headerLevel + (settings ? settings.headingBaseLevel-1 : 0); + dst.writeTag(block.attributes, "h", hlvl); + writeMarkdownEscaped(dst, block.text[0], links, settings); + dst.formattedWrite("\n", hlvl); + break; + case BlockType.table: + import std.algorithm.iteration : splitter; + + static string[Alignment.max+1] alstr = ["", " align=\"left\"", " align=\"right\"", " align=\"center\""]; + + put(dst, "\n"); + put(dst, ""); + size_t i = 0; + foreach (col; block.text[0].getTableColumns()) { + put(dst, "'); + dst.writeMarkdownEscaped(col, links, settings); + put(dst, ""); + if (i + 1 < block.columns.length) + i++; + } + put(dst, "\n"); + foreach (ln; block.text[1 .. $]) { + put(dst, ""); + i = 0; + foreach (col; ln.getTableColumns()) { + put(dst, "'); + dst.writeMarkdownEscaped(col, links, settings); + put(dst, ""); + if (i + 1 < block.columns.length) + i++; + } + put(dst, "\n"); + } + put(dst, "
\n"); + break; + case BlockType.oList: + put(dst, "
    \n"); + foreach (b; block.blocks) + writeBlock(dst, b, links, settings); + put(dst, "
\n"); + break; + case BlockType.uList: + put(dst, "
    \n"); + foreach (b; block.blocks) + writeBlock(dst, b, links, settings); + put(dst, "
\n"); + break; + case BlockType.listItem: + put(dst, "
  • "); + writeMarkdownEscaped(dst, block, links, settings); + foreach (b; block.blocks) + writeBlock(dst, b, links, settings); + put(dst, "
  • \n"); + break; + case BlockType.code: + assert (block.blocks.length == 0); + put(dst, "
    ");
    +			foreach (ln; block.text) {
    +				filterHTMLEscape(dst, ln);
    +				put(dst, "\n");
    +			}
    +			put(dst, "
    \n"); + break; + case BlockType.quote: + put(dst, "
    "); + writeMarkdownEscaped(dst, block, links, settings); + foreach (b; block.blocks) + writeBlock(dst, b, links, settings); + put(dst, "
    \n"); + break; + case BlockType.figure: + put(dst, "
    "); + bool omit_para = block.blocks.count!(b => b.type != BlockType.figureCaption) == 1; + foreach (b; block.blocks) { + if (b.type == BlockType.paragraph && omit_para) { + writeMarkdownEscaped(dst, b, links, settings); + } else writeBlock(dst, b, links, settings); + } + put(dst, "
    \n"); + break; + case BlockType.figureCaption: + put(dst, "
    "); + if (block.blocks.length == 1 && block.blocks[0].type == BlockType.paragraph) { + writeMarkdownEscaped(dst, block.blocks[0], links, settings); + } else { + foreach (b; block.blocks) + writeBlock(dst, b, links, settings); + } + put(dst, "
    \n"); + break; + } +} + +private void writeMarkdownEscaped(R)(ref R dst, ref const Block block, in LinkRef[string] links, scope MarkdownSettings settings) +{ + auto lines = () @trusted { return cast(string[])block.text; } (); + auto text = settings.flags & MarkdownFlags.keepLineBreaks ? lines.join("
    ") : lines.join("\n"); + writeMarkdownEscaped(dst, text, links, settings); + if (lines.length) put(dst, "\n"); +} + +/// private +private void writeMarkdownEscaped(R)(ref R dst, string ln, in LinkRef[string] linkrefs, scope MarkdownSettings settings) +{ + bool isAllowedURI(string lnk) { + auto idx = lnk.indexOf('/'); + auto cidx = lnk.indexOf(':'); + // always allow local URIs + if (cidx < 0 || idx >= 0 && cidx > idx) return true; + return settings.allowedURISchemas.canFind(lnk[0 .. cidx]); + } + + string filterLink(string lnk, bool is_image) { + if (isAllowedURI(lnk)) + return settings.urlFilter ? settings.urlFilter(lnk, is_image) : lnk; + return "#"; // replace link with unknown schema with dummy URI + } + + bool br = ln.endsWith(" "); + while (ln.length > 0) { + switch (ln[0]) { + default: + put(dst, ln[0]); + ln = ln[1 .. $]; + break; + case '\\': + if (ln.length >= 2) { + switch (ln[1]) { + default: + put(dst, ln[0 .. 2]); + ln = ln[2 .. $]; + break; + case '\'', '`', '*', '_', '{', '}', '[', ']', + '(', ')', '#', '+', '-', '.', '!': + put(dst, ln[1]); + ln = ln[2 .. $]; + break; + } + } else { + put(dst, ln[0]); + ln = ln[1 .. $]; + } + break; + case '_': + case '*': + string text; + if (auto em = parseEmphasis(ln, text)) { + put(dst, em == 1 ? "" : em == 2 ? "" : ""); + put(dst, text); + put(dst, em == 1 ? "" : em == 2 ? "": ""); + } else { + put(dst, ln[0]); + ln = ln[1 .. $]; + } + break; + case '`': + string code; + if (parseInlineCode(ln, code)) { + put(dst, ""); + filterHTMLEscape(dst, code, HTMLEscapeFlags.escapeMinimal); + put(dst, ""); + } else { + put(dst, ln[0]); + ln = ln[1 .. $]; + } + break; + case '[': + Link link; + Attribute[] attributes; + if (parseLink(ln, link, linkrefs, + settings.flags & MarkdownFlags.attributes ? &attributes : null)) + { + attributes ~= Attribute("href", filterLink(link.url, false)); + if (link.title.length) + attributes ~= Attribute("title", link.title); + dst.writeTag(attributes, "a"); + writeMarkdownEscaped(dst, link.text, linkrefs, settings); + put(dst, ""); + } else { + put(dst, ln[0]); + ln = ln[1 .. $]; + } + break; + case '!': + Link link; + Attribute[] attributes; + if (parseLink(ln, link, linkrefs, + settings.flags & MarkdownFlags.attributes ? &attributes : null)) + { + attributes ~= Attribute("src", filterLink(link.url, true)); + attributes ~= Attribute("alt", link.text); + if (link.title.length) + attributes ~= Attribute("title", link.title); + dst.writeTag(attributes, "img"); + } else if( ln.length >= 2 ){ + put(dst, ln[0 .. 2]); + ln = ln[2 .. $]; + } else { + put(dst, ln[0]); + ln = ln[1 .. $]; + } + break; + case '>': + if (settings.flags & MarkdownFlags.noInlineHtml) put(dst, ">"); + else put(dst, ln[0]); + ln = ln[1 .. $]; + break; + case '<': + string url; + if (parseAutoLink(ln, url)) { + bool is_email = url.startsWith("mailto:"); + put(dst, ""); + if (is_email) filterHTMLAllEscape(dst, url[7 .. $]); + else filterHTMLEscape(dst, url, HTMLEscapeFlags.escapeMinimal); + put(dst, ""); + } else { + if (ln.startsWith("
    ")) { + // always support line breaks, since we embed them here ourselves! + put(dst, "
    "); + ln = ln[4 .. $]; + } else if(ln.startsWith("
    ")) { + put(dst, "
    "); + ln = ln[5 .. $]; + } else { + if (settings.flags & MarkdownFlags.noInlineHtml) + put(dst, "<"); + else put(dst, ln[0]); + ln = ln[1 .. $]; + } + } + break; + } + } + if (br) put(dst, "
    "); +} + +private void writeTag(R, ARGS...)(ref R dst, string name, ARGS name_additions) +{ + writeTag(dst, cast(Attribute[])null, name, name_additions); +} + +private void writeTag(R, ARGS...)(ref R dst, scope const(Attribute)[] attributes, string name, ARGS name_additions) +{ + dst.formattedWrite("<%s", name); + foreach (add; name_additions) + dst.formattedWrite("%s", add); + foreach (a; attributes) { + dst.formattedWrite(" %s=\"", a.attribute); + dst.filterHTMLAttribEscape(a.value); + put(dst, '\"'); + } + put(dst, '>'); +} + +private bool isLineBlank(string ln) +pure @safe { + return allOf(ln, " \t"); +} + +private bool isSetextHeaderLine(string ln) +pure @safe { + ln = stripLeft(ln); + if (ln.length < 1) return false; + if (ln[0] == '=') { + while (!ln.empty && ln.front == '=') ln.popFront(); + return isLineBlank(ln); + } + if (ln[0] == '-') { + while (!ln.empty && ln.front == '-') ln.popFront(); + return isLineBlank(ln); + } + return false; +} + +private bool isAtxHeaderLine(string ln) +pure @safe { + ln = stripLeft(ln); + size_t i = 0; + while (i < ln.length && ln[i] == '#') i++; + if (i < 1 || i > 6 || i >= ln.length) return false; + return ln[i] == ' '; +} + +private bool isTableSeparatorLine(string ln) +pure @safe { + import std.algorithm.iteration : splitter; + + ln = strip(ln); + if (ln.startsWith("|")) ln = ln[1 .. $]; + if (ln.endsWith("|")) ln = ln[0 .. $-1]; + + auto cols = ln.splitter('|'); + size_t cnt = 0; + foreach (c; cols) { + c = c.strip(); + if (c.startsWith(':')) c = c[1 .. $]; + if (c.endsWith(':')) c = c[0 .. $-1]; + if (c.length < 3 || !c.allOf("-")) + return false; + cnt++; + } + return cnt >= 2; +} + +unittest { + assert(isTableSeparatorLine("|----|---|")); + assert(isTableSeparatorLine("|:----:|---|")); + assert(isTableSeparatorLine("---|----")); + assert(isTableSeparatorLine("| --- | :---- |")); + assert(!isTableSeparatorLine("| ---- |")); + assert(!isTableSeparatorLine("| -- | -- |")); + assert(!isTableSeparatorLine("| --- - | ---- |")); +} + +private auto getTableColumns(string line) +pure @safe nothrow { + import std.algorithm.iteration : map, splitter; + + if (line.startsWith("|")) line = line[1 .. $]; + if (line.endsWith("|")) line = line[0 .. $-1]; + return line.splitter('|').map!(s => s.strip()); +} + +private size_t countTableColumns(string line) +pure @safe { + return getTableColumns(line).count(); +} + +private bool isHlineLine(string ln) +pure @safe { + if (allOf(ln, " -") && count(ln, '-') >= 3) return true; + if (allOf(ln, " *") && count(ln, '*') >= 3) return true; + if (allOf(ln, " _") && count(ln, '_') >= 3) return true; + return false; +} + +private bool allOf(string str, const(char)[] ascii_chars) +pure @safe nothrow { + return str.byCodeUnit.all!(ch => ascii_chars.byCodeUnit.canFind(ch)); +} + +private bool isQuoteLine(string ln) +pure @safe { + return ln.stripLeft().startsWith(">"); +} + +private size_t getQuoteLevel(string ln) +pure @safe { + size_t level = 0; + ln = stripLeft(ln); + while (ln.length > 0 && ln[0] == '>') { + level++; + ln = stripLeft(ln[1 .. $]); + } + return level; +} + +private bool isUListLine(string ln) +pure @safe { + ln = stripLeft(ln); + if (ln.length < 2) return false; + if (!canFind("*+-", ln[0])) return false; + if (ln[1] != ' ' && ln[1] != '\t') return false; + return true; +} + +private bool isOListLine(string ln) +pure @safe { + ln = stripLeft(ln); + if (ln.length < 1) return false; + if (ln[0] < '0' || ln[0] > '9') return false; + ln = ln[1 .. $]; + while (ln.length > 0 && ln[0] >= '0' && ln[0] <= '9') + ln = ln[1 .. $]; + if (ln.length < 2) return false; + if (ln[0] != '.') return false; + if (ln[1] != ' ' && ln[1] != '\t') + return false; + return true; +} + +private string removeListPrefix(string str, LineType tp) +pure @safe { + switch (tp) { + default: assert (false); + case LineType.oList: // skip bullets and output using normal escaping + auto idx = str.indexOf('.'); + assert (idx > 0); + return str[idx+1 .. $].stripLeft(); + case LineType.uList: + return stripLeft(str.stripLeft()[1 .. $]); + } +} + + +private auto parseHtmlBlockLine(string ln) +pure @safe { + struct HtmlBlockInfo { + bool isHtmlBlock; + string tagName; + bool open; + } + + HtmlBlockInfo ret; + ret.isHtmlBlock = false; + ret.open = true; + + ln = strip(ln); + if (ln.length < 3) return ret; + if (ln[0] != '<') return ret; + if (ln[1] == '/') { + ret.open = false; + ln = ln[1 .. $]; + } + import std.ascii : isAlpha; + if (!isAlpha(ln[1])) return ret; + ln = ln[1 .. $]; + size_t idx = 0; + while (idx < ln.length && ln[idx] != ' ' && ln[idx] != '>') + idx++; + ret.tagName = ln[0 .. idx]; + ln = ln[idx .. $]; + + auto eidx = ln.indexOf('>'); + if (eidx < 0) return ret; + if (eidx != ln.length-1) return ret; + + if (!s_blockTags.canFind(ret.tagName)) return ret; + + ret.isHtmlBlock = true; + return ret; +} + +private bool isHtmlBlockLine(string ln) +pure @safe { + auto bi = parseHtmlBlockLine(ln); + return bi.isHtmlBlock && bi.open; +} + +private bool isHtmlBlockCloseLine(string ln) +pure @safe { + auto bi = parseHtmlBlockLine(ln); + return bi.isHtmlBlock && !bi.open; +} + +private bool isCodeBlockDelimiter(string ln) +pure @safe { + return ln.stripLeft.startsWith("```"); +} + +private string getHtmlTagName(string ln) +pure @safe { + return parseHtmlBlockLine(ln).tagName; +} + +private bool isLineIndented(string ln) +pure @safe { + return ln.startsWith("\t") || ln.startsWith(" "); +} + +private string unindentLine(string ln) +pure @safe { + if (ln.startsWith("\t")) return ln[1 .. $]; + if (ln.startsWith(" ")) return ln[4 .. $]; + assert (false); +} + +private int parseEmphasis(ref string str, ref string text) +pure @safe { + string pstr = str; + if (pstr.length < 3) return false; + + string ctag; + if (pstr.startsWith("***")) ctag = "***"; + else if (pstr.startsWith("**")) ctag = "**"; + else if (pstr.startsWith("*")) ctag = "*"; + else if (pstr.startsWith("___")) ctag = "___"; + else if (pstr.startsWith("__")) ctag = "__"; + else if (pstr.startsWith("_")) ctag = "_"; + else return false; + + pstr = pstr[ctag.length .. $]; + + auto cidx = () @trusted { return pstr.indexOf(ctag); }(); + if (cidx < 1) return false; + + text = pstr[0 .. cidx]; + + str = pstr[cidx+ctag.length .. $]; + return cast(int)ctag.length; +} + +private bool parseInlineCode(ref string str, ref string code) +pure @safe { + string pstr = str; + if (pstr.length < 3) return false; + string ctag; + if (pstr.startsWith("``")) ctag = "``"; + else if (pstr.startsWith("`")) ctag = "`"; + else return false; + pstr = pstr[ctag.length .. $]; + + auto cidx = () @trusted { return pstr.indexOf(ctag); }(); + if (cidx < 1) return false; + + code = pstr[0 .. cidx]; + str = pstr[cidx+ctag.length .. $]; + return true; +} + +private bool parseLink(ref string str, ref Link dst, scope const(LinkRef[string]) linkrefs, scope Attribute[]* attributes) +pure @safe { + string pstr = str; + if (pstr.length < 3) return false; + // ignore img-link prefix + if (pstr[0] == '!') pstr = pstr[1 .. $]; + + // parse the text part [text] + if (pstr[0] != '[') return false; + auto cidx = pstr.matchBracket(); + if (cidx < 1) return false; + string refid; + dst.text = pstr[1 .. cidx]; + pstr = pstr[cidx+1 .. $]; + + // parse either (link '['"title"']') or '[' ']'[refid] + if (pstr.length < 2) return false; + if (pstr[0] == '(') { + cidx = pstr.matchBracket(); + if (cidx < 1) return false; + auto inner = pstr[1 .. cidx]; + immutable qidx = inner.indexOf('"'); + import std.ascii : isWhite; + if (qidx > 1 && inner[qidx - 1].isWhite()) { + dst.url = inner[0 .. qidx].stripRight(); + immutable len = inner[qidx .. $].lastIndexOf('"'); + if (len == 0) return false; + assert (len > 0); + dst.title = inner[qidx + 1 .. qidx + len]; + } else { + dst.url = inner.stripRight(); + dst.title = null; + } + if (dst.url.startsWith("<") && dst.url.endsWith(">")) + dst.url = dst.url[1 .. $-1]; + pstr = pstr[cidx+1 .. $]; + + if (attributes) { + if (pstr.startsWith('{')) { + auto idx = pstr.indexOf('}'); + if (idx > 0) { + parseAttributeString(pstr[1 .. idx], *attributes); + pstr = pstr[idx+1 .. $]; + } + } + } + } else { + if (pstr[0] == ' ') pstr = pstr[1 .. $]; + if (pstr[0] != '[') return false; + pstr = pstr[1 .. $]; + cidx = pstr.indexOf(']'); + if (cidx < 0) return false; + if (cidx == 0) refid = dst.text; + else refid = pstr[0 .. cidx]; + pstr = pstr[cidx+1 .. $]; + } + + if (refid.length > 0) { + auto pr = toLower(refid) in linkrefs; + if (!pr) { + return false; + } + dst.url = pr.url; + dst.title = pr.title; + if (attributes) *attributes ~= pr.attributes; + } + + str = pstr; + return true; +} + +@safe unittest +{ + static void testLink(string s, Link exp, in LinkRef[string] refs) + { + Link link; + assert (parseLink(s, link, refs, null), s); + assert (link == exp); + } + LinkRef[string] refs; + refs["ref"] = LinkRef("ref", "target", "title"); + + testLink(`[link](target)`, Link("link", "target"), null); + testLink(`[link](target "title")`, Link("link", "target", "title"), null); + testLink(`[link](target "title")`, Link("link", "target", "title"), null); + testLink(`[link](target "title" )`, Link("link", "target", "title"), null); + + testLink(`[link](target)`, Link("link", "target"), null); + testLink(`[link](target "title")`, Link("link", "target", "title"), null); + + testLink(`[link][ref]`, Link("link", "target", "title"), refs); + testLink(`[ref][]`, Link("ref", "target", "title"), refs); + + testLink(`[link[with brackets]](target)`, Link("link[with brackets]", "target"), null); + testLink(`[link[with brackets]][ref]`, Link("link[with brackets]", "target", "title"), refs); + + testLink(`[link](/target with spaces )`, Link("link", "/target with spaces"), null); + testLink(`[link](/target with spaces "title")`, Link("link", "/target with spaces", "title"), null); + + testLink(`[link](white-space "around title" )`, Link("link", "white-space", "around title"), null); + testLink(`[link](tabs "around title" )`, Link("link", "tabs", "around title"), null); + + testLink(`[link](target "")`, Link("link", "target", ""), null); + testLink(`[link](target-no-title"foo" )`, Link("link", "target-no-title\"foo\"", ""), null); + + testLink(`[link]()`, Link("link", "target"), null); + + auto failing = [ + `text`, `[link](target`, `[link]target)`, `[link]`, + `[link(target)`, `link](target)`, `[link] (target)`, + `[link][noref]`, `[noref][]` + ]; + Link link; + foreach (s; failing) + assert (!parseLink(s, link, refs, null), s); +} + +@safe unittest { // attributes + void test(string s, LinkRef[string] refs, bool parse_atts, string exprem, Link explnk, Attribute[] expatts...) + @safe { + Link lnk; + Attribute[] atts; + parseLink(s, lnk, refs, parse_atts ? () @trusted { return &atts; } () : null); + assert (lnk == explnk); + assert (s == exprem); + assert (atts == expatts); + } + + test("[foo](bar){.baz}", null, false, "{.baz}", Link("foo", "bar", "")); + test("[foo](bar){.baz}", null, true, "", Link("foo", "bar", ""), Attribute("class", "baz")); + + auto refs = ["bar": LinkRef("bar", "url", "title", [Attribute("id", "hid")])]; + test("[foo][bar]", refs, false, "", Link("foo", "url", "title")); + test("[foo][bar]", refs, true, "", Link("foo", "url", "title"), Attribute("id", "hid")); +} + +private bool parseAutoLink(ref string str, ref string url) +pure @safe { + import std.algorithm.searching : all; + import std.ascii : isAlphaNum; + + string pstr = str; + if (pstr.length < 3) return false; + if (pstr[0] != '<') return false; + pstr = pstr[1 .. $]; + auto cidx = pstr.indexOf('>'); + if (cidx < 0) return false; + + url = pstr[0 .. cidx]; + if (url.any!(ch => ch == ' ' || ch == '\t')) return false; + auto atidx = url.indexOf('@'); + auto colonidx = url.indexOf(':'); + if (atidx < 0 && colonidx < 0) return false; + + str = pstr[cidx+1 .. $]; + if (atidx < 0) return true; + if (colonidx < 0 || colonidx > atidx || + !url[0 .. colonidx].all!(ch => ch.isAlphaNum)) + url = "mailto:" ~ url; + return true; +} + +unittest { + void test(bool expected, string str, string url) + { + string strcpy = str; + string outurl; + if (!expected) { + assert (!parseAutoLink(strcpy, outurl)); + assert (outurl.length == 0); + assert (strcpy == str); + } else { + assert (parseAutoLink(strcpy, outurl)); + assert (outurl == url); + assert (strcpy.length == 0); + } + } + + test(true, "", "http://foo/"); + test(false, "", "mailto:foo@bar"); + test(true, "", "mailto:foo@bar"); + test(true, "", "proto:foo@bar"); + test(true, "", "proto:foo@bar:123"); + test(true, "<\"foo:bar\"@baz>", "mailto:\"foo:bar\"@baz"); +} + +private string skipAttributes(ref string line) +@safe pure { + auto strs = line.stripRight; + if (!strs.endsWith("}")) return null; + + auto idx = strs.lastIndexOf('{'); + if (idx < 0) return null; + + auto ret = strs[idx+1 .. $-1]; + line = strs[0 .. idx]; + return ret; +} + +unittest { + void test(string inp, string outp, string att) + { + auto ratt = skipAttributes(inp); + assert (ratt == att, ratt); + assert (inp == outp, inp); + } + + test(" foo ", " foo ", null); + test("foo {bar}", "foo ", "bar"); + test("foo {bar} ", "foo ", "bar"); + test("foo bar} ", "foo bar} ", null); + test(" {bar} foo ", " {bar} foo ", null); + test(" fo {o {bar} ", " fo {o ", "bar"); + test(" fo {o} {bar} ", " fo {o} ", "bar"); +} + +private void parseAttributeString(string attributes, ref Attribute[] dst) +@safe pure { + import std.algorithm.iteration : splitter; + + // TODO: handle custom attributes (requires a different approach than splitter) + + foreach (el; attributes.splitter(' ')) { + el = el.strip; + if (!el.length) continue; + if (el[0] == '#') { + auto idx = dst.countUntil!(a => a.attribute == "id"); + if (idx >= 0) dst[idx].value = el[1 .. $]; + else dst ~= Attribute("id", el[1 .. $]); + } else if (el[0] == '.') { + auto idx = dst.countUntil!(a => a.attribute == "class"); + if (idx >= 0) dst[idx].value ~= " " ~ el[1 .. $]; + else dst ~= Attribute("class", el[1 .. $]); + } + } +} + +unittest { + void test(string str, Attribute[] atts...) + { + Attribute[] res; + parseAttributeString(str, res); + assert (res == atts, format("%s: %s", str, res)); + } + + test(""); + test(".foo", Attribute("class", "foo")); + test("#foo", Attribute("id", "foo")); + test("#foo #bar", Attribute("id", "bar")); + test(".foo .bar", Attribute("class", "foo bar")); + test("#foo #bar", Attribute("id", "bar")); + test(".foo #bar .baz", Attribute("class", "foo baz"), Attribute("id", "bar")); +} + +private LinkRef[string] scanForReferences(ref string[] lines) +pure @safe { + LinkRef[string] ret; + bool[size_t] reflines; + + // search for reference definitions: + // [refid] link "opt text" + // [refid] "opt text" + // "opt text", 'opt text', (opt text) + // line must not be indented + foreach (lnidx, ln; lines) { + if (isLineIndented(ln)) continue; + ln = strip(ln); + if (!ln.startsWith("[")) continue; + ln = ln[1 .. $]; + + auto idx = () @trusted { return ln.indexOf("]:"); }(); + if (idx < 0) continue; + string refid = ln[0 .. idx]; + ln = stripLeft(ln[idx+2 .. $]); + + string attstr = ln.skipAttributes(); + + string url; + if (ln.startsWith("<")) { + idx = ln.indexOf('>'); + if (idx < 0) continue; + url = ln[1 .. idx]; + ln = ln[idx+1 .. $]; + } else { + idx = ln.indexOf(' '); + if (idx > 0) { + url = ln[0 .. idx]; + ln = ln[idx+1 .. $]; + } else { + idx = ln.indexOf('\t'); + if (idx < 0) { + url = ln; + ln = ln[$ .. $]; + } else { + url = ln[0 .. idx]; + ln = ln[idx+1 .. $]; + } + } + } + ln = stripLeft(ln); + + string title; + if (ln.length >= 3) { + if (ln[0] == '(' && ln[$-1] == ')' + || ln[0] == '\"' && ln[$-1] == '\"' + || ln[0] == '\'' && ln[$-1] == '\'' ) + { + title = ln[1 .. $-1]; + } + } + + LinkRef lref; + lref.id = refid; + lref.url = url; + lref.title = title; + parseAttributeString(attstr, lref.attributes); + ret[toLower(refid)] = lref; + reflines[lnidx] = true; + } + + // remove all lines containing references + auto nonreflines = appender!(string[])(); + nonreflines.reserve(lines.length); + foreach (i, ln; lines) + if (i !in reflines) + nonreflines.put(ln); + lines = nonreflines.data(); + + return ret; +} + + +/** + Generates an identifier suitable to use as within a URL. + + The resulting string will contain only ASCII lower case alphabetic or + numeric characters, as well as dashes (-). Every sequence of + non-alphanumeric characters will be replaced by a single dash. No dashes + will be at either the front or the back of the result string. +*/ +auto asSlug(R)(R text) + if (isInputRange!R && is(typeof(R.init.front) == dchar)) +{ + static struct SlugRange { + private { + R _input; + bool _dash; + } + + this(R input) + { + _input = input; + skipNonAlphaNum(); + } + + @property bool empty() const { return _dash ? false : _input.empty; } + @property char front() const { + if (_dash) return '-'; + + char r = cast(char)_input.front; + if (r >= 'A' && r <= 'Z') return cast(char)(r + ('a' - 'A')); + return r; + } + + void popFront() + { + if (_dash) { + _dash = false; + return; + } + + _input.popFront(); + auto na = skipNonAlphaNum(); + if (na && !_input.empty) + _dash = true; + } + + private bool skipNonAlphaNum() + { + bool have_skipped = false; + while (!_input.empty) { + switch (_input.front) { + default: + _input.popFront(); + have_skipped = true; + break; + case 'a': .. case 'z': + case 'A': .. case 'Z': + case '0': .. case '9': + return have_skipped; + } + } + return have_skipped; + } + } + return SlugRange(text); +} + +unittest { + import std.algorithm : equal; + assert ("".asSlug.equal("")); + assert (".,-".asSlug.equal("")); + assert ("abc".asSlug.equal("abc")); + assert ("aBc123".asSlug.equal("abc123")); + assert ("....aBc...123...".asSlug.equal("abc-123")); +} + + +/** + Finds the closing bracket (works with any of '[', '$(LPAREN)', '<', '{'). + + Params: + str = input string + nested = whether to skip nested brackets + Returns: + The index of the closing bracket or -1 for unbalanced strings + and strings that don't start with a bracket. +*/ +private sizediff_t matchBracket(const(char)[] str, bool nested = true) +@safe pure nothrow { + if (str.length < 2) return -1; + + char open = str[0], close = void; + switch (str[0]) { + case '[': close = ']'; break; + case '(': close = ')'; break; + case '<': close = '>'; break; + case '{': close = '}'; break; + default: return -1; + } + + size_t level = 1; + foreach (i, char c; str[1 .. $]) { + if (nested && c == open) ++level; + else if (c == close) --level; + if (level == 0) return i + 1; + } + return -1; +} + +@safe unittest +{ + static struct Test { string str; sizediff_t res; } + enum tests = [ + Test("[foo]", 4), Test("", 4), Test("{baz}", 4), + Test("[", -1), Test("[foo", -1), Test("ab[f]", -1), + Test("[foo[bar]]", 9), Test("[foo{bar]]", 8), + ]; + foreach (test; tests) + assert(matchBracket(test.str) == test.res); + assert(matchBracket("[foo[bar]]", false) == 8); + static assert(matchBracket("[foo]") == 4); +} + + +private struct LinkRef { + string id; + string url; + string title; + Attribute[] attributes; +} + +private struct Link { + string text; + string url; + string title; +} + +@safe unittest { // alt and title attributes + assert (filterMarkdown("![alt](http://example.org/image)") + == "

    \"alt\"\n

    \n"); + assert (filterMarkdown("![alt](http://example.org/image \"Title\")") + == "

    \"alt\"\n

    \n"); +} + +@safe unittest { // complex links + assert (filterMarkdown("their [install\ninstructions]() and") + == "

    their install\ninstructions and\n

    \n"); + assert (filterMarkdown("[![Build Status](https://travis-ci.org/rejectedsoftware/vibe.d.png)](https://travis-ci.org/rejectedsoftware/vibe.d)") + == "

    \"Build\n

    \n"); +} + +@safe unittest { // check CTFE-ability + enum res = filterMarkdown("### some markdown\n[foo][]\n[foo]: /bar"); + assert (res == "

    some markdown

    \n

    foo\n

    \n", res); +} + +@safe unittest { // correct line breaks in restrictive mode + auto res = filterMarkdown("hello\nworld", MarkdownFlags.forumDefault); + assert (res == "

    hello
    world\n

    \n", res); +} + +/*@safe unittest { // code blocks and blockquotes + assert (filterMarkdown("\tthis\n\tis\n\tcode") == + "
    this\nis\ncode
    \n"); + assert (filterMarkdown(" this\n is\n code") == + "
    this\nis\ncode
    \n"); + assert (filterMarkdown(" this\n is\n\tcode") == + "
    this\nis
    \n
    code
    \n"); + assert (filterMarkdown("\tthis\n\n\tcode") == + "
    this\n\ncode
    \n"); + assert (filterMarkdown("\t> this") == + "
    > this
    \n"); + assert (filterMarkdown("> this") == + "
    this
    \n"); + assert (filterMarkdown("> this\n is code") == + "
    this\nis code
    \n"); +}*/ + +@safe unittest { + assert (filterMarkdown("## Hello, World!") == "

    Hello, World!

    \n", filterMarkdown("## Hello, World!")); +} + +@safe unittest { // tables + assert (filterMarkdown("foo|bar\n---|---", MarkdownFlags.tables) + == "\n\n
    foobar
    \n"); + assert (filterMarkdown(" *foo* | bar \n---|---\n baz|bam", MarkdownFlags.tables) + == "\n\n\n
    foobar
    bazbam
    \n"); + assert (filterMarkdown("|foo|bar|\n---|---\n baz|bam", MarkdownFlags.tables) + == "\n\n\n
    foobar
    bazbam
    \n"); + assert (filterMarkdown("foo|bar\n|---|---|\nbaz|bam", MarkdownFlags.tables) + == "\n\n\n
    foobar
    bazbam
    \n"); + assert (filterMarkdown("foo|bar\n---|---\n|baz|bam|", MarkdownFlags.tables) + == "\n\n\n
    foobar
    bazbam
    \n"); + assert (filterMarkdown("foo|bar|baz\n:---|---:|:---:\n|baz|bam|bap|", MarkdownFlags.tables) + == "\n\n" + ~ "\n
    foobarbaz
    bazbambap
    \n"); + assert (filterMarkdown(" |bar\n---|---", MarkdownFlags.tables) + == "\n\n
    bar
    \n"); + assert (filterMarkdown("foo|bar\n---|---\nbaz|", MarkdownFlags.tables) + == "\n\n\n
    foobar
    baz
    \n"); +} + +@safe unittest { // issue #1527 - blank lines in code blocks + assert (filterMarkdown(" foo\n\n bar\n") == + "
    foo\n\nbar\n
    \n"); +} + +@safe unittest { + assert (filterMarkdown("> ```\r\n> test\r\n> ```", MarkdownFlags.forumDefault) == + "
    test\n
    \n
    \n"); +} + +@safe unittest { // issue #1845 - malicious URI targets + assert (filterMarkdown("[foo](javascript:foo) ![bar](javascript:bar) ", MarkdownFlags.forumDefault) == + "

    foo \"bar\" javascript:baz\n

    \n"); + assert (filterMarkdown("[foo][foo] ![foo][foo]\n[foo]: javascript:foo", MarkdownFlags.forumDefault) == + "

    foo \"foo\"\n

    \n"); + assert (filterMarkdown("[foo](javascript%3Abar)", MarkdownFlags.forumDefault) == + "

    foo\n

    \n"); + + // extra XSS regression tests + assert (filterMarkdown("[](bar)", MarkdownFlags.forumDefault) == + "

    <script></script>\n

    \n"); + assert (filterMarkdown("[foo](\">foo\n

    \n"); + assert (filterMarkdown("[foo](javascript:bar)", MarkdownFlags.forumDefault) == + "

    foo\n

    \n"); +} + +@safe unittest { // issue #2132 - table with more columns in body goes out of array bounds + assert (filterMarkdown("| a | b |\n|--------|--------|\n| c | d | e |", MarkdownFlags.tables) == + "\n\n\n
    ab
    cde
    \n"); +} + +@safe unittest { // lists + assert (filterMarkdown("- foo\n- bar") == + "
      \n
    • foo\n
    • \n
    • bar\n
    • \n
    \n"); + assert (filterMarkdown("- foo\n\n- bar") == + "
      \n
    • foo\n

      \n
    • \n
    • bar\n

      \n
    • \n
    \n"); + assert (filterMarkdown("1. foo\n2. bar") == + "
      \n
    1. foo\n
    2. \n
    3. bar\n
    4. \n
    \n"); + assert (filterMarkdown("1. foo\n\n2. bar") == + "
      \n
    1. foo\n

      \n
    2. \n
    3. bar\n

      \n
    4. \n
    \n"); + assert (filterMarkdown("1. foo\n\n\tbar\n\n2. bar\n\n\tbaz\n\n") == + "
      \n
    1. foo\n

      \n

      bar\n

      \n
    2. \n
    3. bar\n

      \n

      baz\n

      \n
    4. \n
    \n"); +} + +@safe unittest { // figures + assert (filterMarkdown("- %%%") == "
      \n
    • %%%\n
    • \n
    \n"); + assert (filterMarkdown("- ###") == "
      \n
    • ###\n
    • \n
    \n"); + assert (filterMarkdown("- %%%", MarkdownFlags.figures) == "
    \n"); + assert (filterMarkdown("- ###", MarkdownFlags.figures) == "
    \n"); + assert (filterMarkdown("- %%%\n\tfoo\n\n\t- ###\n\t\tbar", MarkdownFlags.figures) == + "
    foo\n
    bar\n
    \n
    \n"); + assert (filterMarkdown("- %%%\n\tfoo\n\n\tbar\n\n\t- ###\n\t\tbaz", MarkdownFlags.figures) == + "

    foo\n

    \n

    bar\n

    \n
    baz\n
    \n
    \n"); + assert (filterMarkdown("- %%%\n\tfoo\n\n\t- ###\n\t\tbar\n\n\t\tbaz", MarkdownFlags.figures) == + "
    foo\n

    bar\n

    \n

    baz\n

    \n
    \n
    \n"); + assert (filterMarkdown("- %%%\n\t1. foo\n\t2. bar\n\n\t- ###\n\t\tbaz", MarkdownFlags.figures) == + "
      \n
    1. foo\n
    2. \n
    3. bar\n
    4. \n
    \n
    baz\n
    \n
    \n"); + assert (filterMarkdown("- foo\n- %%%", MarkdownFlags.figures) == "
      \n
    • foo\n
    • \n
    \n
    \n"); + assert (filterMarkdown("- foo\n\n- %%%", MarkdownFlags.figures) == "
      \n
    • foo\n
    • \n
    \n
    \n"); +} + +@safe unittest { // HTML entities + assert(filterMarkdown(" ") == "

     \n

    \n"); + assert(filterMarkdown("* *") == "

     \n

    \n"); + assert(filterMarkdown("` `") == "

    &nbsp;\n

    \n"); +} + diff --git a/textfilter/vibe/textfilter/urlencode.d b/textfilter/vibe/textfilter/urlencode.d new file mode 100644 index 0000000..81275b9 --- /dev/null +++ b/textfilter/vibe/textfilter/urlencode.d @@ -0,0 +1,357 @@ +/** + URL-encoding implementation + + Copyright: © 2012-2015 Sönke Ludwig + License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. + Authors: Jan Krüger, Sönke Ludwig +*/ +module vibe.textfilter.urlencode; + +import std.algorithm; +import std.array; +import std.conv; +import std.exception; +import std.format; +import std.range; +import std.utf : byCodeUnit; + + +/** + * Returns: + * the URL encoded version of a given string, in a newly-allocated string. + */ +T[] urlEncode(T)(T[] str, const(char)[] allowed_chars = null) if (is(T[] : const(char)[])) +{ + auto dst = StringSliceAppender!(T[])(str); + filterURLEncode(dst, str, allowed_chars); + return dst.data; +} + +@safe unittest { + string s = "hello-world"; + assert(s.urlEncode().ptr == s.ptr); +} + +private auto isCorrectHexNum(const(char)[] str) +@safe { + foreach (char c; str) { + switch(c) { + case '0': .. case '9': + case 'A': .. case 'F': + case 'a': .. case 'f': + break; + default: + return false; + } + } + return true; +} + +/** Checks whether a given string has valid URL encoding. +*/ +bool isURLEncoded(const(char)[] str, const(char)[] reserved_chars = null) +@safe nothrow { + import std.string : representation; + + for (size_t i = 0; i < str.length; i++) { + if (isAsciiAlphaNum(str[i])) + continue; + + switch (str[i]) { + case '-': + case '.': + case '_': + case '~': + break; + case '%': + if (i + 2 >= str.length) + return false; + if (!isCorrectHexNum(str[i+1 .. i+3])) + return false; + i += 2; + break; + default: + if (reserved_chars.representation.canFind(str[i])) + return false; + break; + } + } + return true; +} + +@safe nothrow unittest { + assert(isURLEncoded("hello-world")); + assert(isURLEncoded("he%2F%af")); + assert(!isURLEncoded("hello world", " ")); + assert(!isURLEncoded("he%f")); + assert(!isURLEncoded("he%fx")); +} + +/** Returns the decoded version of a given URL encoded string. +*/ +T[] urlDecode(T)(T[] str) if (is(T[] : const(char)[])) +{ + if (!str.byCodeUnit.canFind('%')) return str; + auto dst = StringSliceAppender!(T[])(str); + filterURLDecode(dst, str); + return dst.data; +} + +/** Returns the form encoded version of a given string. + + Form encoding is the same as normal URL encoding, except that + spaces are replaced by plus characters. + + Note that newlines should always be represented as \r\n sequences + according to the HTTP standard. +*/ +T[] formEncode(T)(T[] str, const(char)[] allowed_chars = null) if (is(T[] : const(char)[])) +{ + auto dst = StringSliceAppender!(T[])(str); + filterURLEncode(dst, str, allowed_chars, true); + return dst.data; +} + +/** Returns the decoded version of a form encoded string. + + Form encoding is the same as normal URL encoding, except that + spaces are replaced by plus characters. +*/ +T[] formDecode(T)(T[] str) if (is(T[] : const(char)[])) +{ + if (!str.byCodeUnit.any!(ch => ch == '%' || ch == '+')) return str; + auto dst = StringSliceAppender!(T[])(str); + filterURLDecode(dst, str, true); + return dst.data; +} + +/** Writes the URL encoded version of the given string to an output range. +*/ +void filterURLEncode(R)(ref R dst, const(char)[] str, + const(char)[] allowed_chars = null, + bool form_encoding = false) +{ + while (str.length > 0) { + if (isAsciiAlphaNum(str[0])) { + put(dst, str[0]); + } else switch (str[0]) { + default: + if (allowed_chars.canFind(str[0])) put(dst, str[0]); + else { + static if (is(typeof({ R a, b; b = a; }))) + formattedWrite(dst, "%%%02X", str[0]); + else + formattedWrite(() @trusted { return &dst; } (), "%%%02X", str[0]); + } + break; + case ' ': + if (form_encoding) { + put(dst, '+'); + break; + } + goto default; + case '-': case '_': case '.': case '~': + put(dst, str[0]); + break; + } + str = str[1 .. $]; + } +} + + +/** Writes the decoded version of the given URL encoded string to an output range. +*/ +void filterURLDecode(R)(ref R dst, const(char)[] str, bool form_encoding = false) +{ + while( str.length > 0 ) { + switch(str[0]) { + case '%': + enforce(str.length >= 3, "invalid percent encoding"); + auto hex = str[1..3]; + auto c = cast(char)parse!int(hex, 16); + enforce(hex.length == 0, "invalid percent encoding"); + put(dst, c); + str = str[3 .. $]; + break; + case '+': + if (form_encoding) { + put(dst, ' '); + str = str[1 .. $]; + break; + } + goto default; + default: + put(dst, str[0]); + str = str[1 .. $]; + break; + } + } +} + + +@safe unittest +{ + assert(urlEncode("\r\n") == "%0D%0A"); // github #65 + assert(urlEncode("This-is~a_test") == "This-is~a_test"); + assert(urlEncode("This is a test") == "This%20is%20a%20test"); + assert(urlEncode("This{is}test") == "This%7Bis%7Dtest"); + assert(formEncode("This is a test") == "This+is+a+test"); + assert(formEncode("this/test", "/") == "this/test"); + assert(formEncode("this/test") == "this%2Ftest"); + assert(urlEncode("%") == "%25"); + assert(urlEncode("!") == "%21"); + assert(urlDecode("%0D%0a") == "\r\n"); + assert(urlDecode("%c2%aE") == "®"); + assert(urlDecode("This+is%20a+test") == "This+is a+test"); + assert(formDecode("This+is%20a+test") == "This is a test"); + + string a = "This~is a-test!\r\nHello, Wörld.. "; + string aenc = urlEncode(a); + assert(aenc == "This~is%20a-test%21%0D%0AHello%2C%20W%C3%B6rld..%20"); + assert(urlDecode(urlEncode(a)) == a); +} + +// for issue https://github.com/vibe-d/vibe.d/issues/2541 +@safe unittest +{ + static struct LimitedRange + { + char[] buf; + void put(const(char)[] data) { + .put(buf, data); + } + } + + char[100] buf1; + char[100] buf2; + auto r = LimitedRange(buf1[]); + r.filterURLEncode("This-is~a_test"); + auto result = buf1[0 .. buf1.length - r.buf.length]; + assert(result == "This-is~a_test"); + + r = LimitedRange(buf1[]); + r.filterURLEncode("This is a test"); + result = buf1[0 .. buf1.length - r.buf.length]; + assert(result == "This%20is%20a%20test"); + + r = LimitedRange(buf2[]); + r.filterURLDecode(result); + result = buf2[0 .. buf2.length - r.buf.length]; + assert(result == "This is a test"); +} + + +private struct StringSliceAppender(S) { + private { + Appender!S m_appender; + S m_source; + size_t m_prefixLength; + } + + this(S source) + { + m_source = source; + if (m_source.length == 0) + m_appender = appender!S(); + } + + @disable this(this); + + void put(char ch) + { + if (m_source.length) { + if (m_prefixLength < m_source.length && m_source[m_prefixLength] == ch) { + m_prefixLength++; + return; + } + + m_appender = appender!S(); + m_appender.put(m_source[0 .. m_prefixLength]); + m_appender.put(ch); + m_source = S.init; + } else m_appender.put(ch); + } + + void put(S s) + { + if (m_source.length) { + foreach (char ch; s) + put(ch); + } else m_appender.put(s); + } + + void put(dchar ch) + { + import std.encoding : encode; + char[6] chars; + auto n = encode(ch, chars[]); + foreach (char c; chars[0 .. n]) put(c); + } + + @property S data() + { + return m_source.length ? m_source[0 .. m_prefixLength] : m_appender.data; + } +} + +@safe unittest { + string s = "foo"; + auto a = StringSliceAppender!string(s); + a.put("f"); assert(a.data == "f"); assert(a.data.ptr is s.ptr); + a.put('o'); assert(a.data == "fo"); assert(a.data.ptr is s.ptr); + a.put('o'); assert(a.data == "foo"); assert(a.data.ptr is s.ptr); + a.put('ä'); assert(a.data == "fooä"); + + a = StringSliceAppender!string(s); + a.put('f'); assert(a.data == "f"); assert(a.data.ptr is s.ptr); + a.put("oobar"); assert(a.data == "foobar"); + + a = StringSliceAppender!string(s); + a.put(cast(dchar)'f'); assert(a.data == "f"); assert(a.data.ptr is s.ptr); + a.put('b'); assert(a.data == "fb"); + + a = StringSliceAppender!string(s); + a.put('f'); assert(a.data == "f"); assert(a.data.ptr is s.ptr); + a.put("b"); assert(a.data == "fb"); + + a = StringSliceAppender!string(s); + a.put('f'); assert(a.data == "f"); assert(a.data.ptr is s.ptr); + a.put("ä"); assert(a.data == "fä"); + + a = StringSliceAppender!string(s); + a.put("bar"); assert(a.data == "bar"); + + a = StringSliceAppender!string(s); + a.put('b'); assert(a.data == "b"); + + a = StringSliceAppender!string(s); + a.put('ä'); assert(a.data == "ä"); + + a = StringSliceAppender!string(s); + a.put("foo"); assert(a.data == "foo"); assert(a.data.ptr is s.ptr); + a.put("bar"); assert(a.data == "foobar"); + + a = StringSliceAppender!string(s); + a.put("foo"); assert(a.data == "foo"); assert(a.data.ptr is s.ptr); + a.put('b'); assert(a.data == "foob"); +} + +private static bool isAsciiAlphaNum(char ch) +@safe nothrow pure @nogc { + return (uint(ch) & 0xDF) - 0x41 < 26 || uint(ch) - '0' <= 9; +} + +unittest { + assert(!isAsciiAlphaNum('@')); + assert(isAsciiAlphaNum('A')); + assert(isAsciiAlphaNum('Z')); + assert(!isAsciiAlphaNum('[')); + assert(!isAsciiAlphaNum('`')); + assert(isAsciiAlphaNum('a')); + assert(isAsciiAlphaNum('z')); + assert(!isAsciiAlphaNum('{')); + assert(!isAsciiAlphaNum('/')); + assert(isAsciiAlphaNum('0')); + assert(isAsciiAlphaNum('9')); + assert(!isAsciiAlphaNum(':')); +}