diff --git a/.gitignore b/.gitignore index 95dba8e..6f49e40 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,13 @@ phpunit.xml clover.xml src/AutoloadStub.php .env +*~ +*.log + +# ext compiling +*.gch +*.lo +*.o +*.so +libtool +compiled-files-sum diff --git a/Makefile b/Makefile index f4b9777..bc85cb6 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ help: @grep -E '^[-a-zA-Z0-9_\.\/]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " \033[32m%-15s\033[0m\t%s\n", $$1, $$2}' -build: tests-all cs-check ## Run all necessary stuff before commit. +build: ext-build tests-all cs-check ## Run all necessary stuff before commit. tests: CMD=composer tests -- $(ARGS) @@ -107,3 +107,15 @@ release: .env build docker-run: ## Run a command in a latest JSON Machine PHP docker container. Ex.: make docker-run CMD="php -v" @$(call DOCKER_RUN,$(LATEST_PHP),$(CMD)) + +EXT_BUILD_POST_CMD=composer performance-tests +ext-build: ## Build JSON Machine's PHP extension for production and run performance tests + docker build --tag json-machine-ext --build-arg debug=$(DEBUG) ext/build + ./build/docker-run.sh \ + "json-machine-ext" \ + "$$PWD" \ + "cd /project/ext/jsonmachine; phpize && ./configure $(DEBUG) && make clean && make && cd /project && composer tests -- --colors=always --stop-on-failure --filter \\\\JsonMachineTest\\\\ExtTokensTest && composer performance-tests" + #"cd /project/ext/jsonmachine; phpize && ./configure $(DEBUG) && make clean && make && cd /project && valgrind php /project/jnt.php" + +ext-build-debug: DEBUG=--enable-debug +ext-build-debug: ext-build ## Build JSON Machine's PHP extension for development and run tests diff --git a/build/build-image.sh b/build/build-image.sh index d9e1835..5cbc185 100755 --- a/build/build-image.sh +++ b/build/build-image.sh @@ -28,6 +28,7 @@ printf " make \ bash \ linux-headers \ + git \ && wget http://pear.php.net/go-pear.phar && php go-pear.phar \ && pecl install xdebug-$XDEBUG_VERSION \ && docker-php-ext-enable xdebug \ diff --git a/build/docker-run.sh b/build/docker-run.sh index f45c031..71d5102 100755 --- a/build/docker-run.sh +++ b/build/docker-run.sh @@ -7,11 +7,11 @@ shift PROJECT_DIR=$1 shift -docker run --rm \ +docker run $DOCKER_RUN_OPTS --rm \ --name "$CONTAINER_NAME" \ - --volume "$PROJECT_DIR:/usr/src/json-machine" \ + --volume "$PROJECT_DIR:/project" \ --volume "/tmp:/tmp" \ - --workdir "/usr/src/json-machine" \ + --workdir "/project" \ --user "$(id -u):$(id -g)" \ --env COMPOSER_CACHE_DIR=/tmp \ "$CONTAINER_NAME" \ diff --git a/ext/build/Dockerfile b/ext/build/Dockerfile new file mode 100644 index 0000000..1ca0129 --- /dev/null +++ b/ext/build/Dockerfile @@ -0,0 +1,47 @@ +# Use phusion/baseimage as base image. To make your builds reproducible, make +# sure you lock down to a specific version, not to `latest`! +# See https://github.com/phusion/baseimage-docker/blob/master/Changelog.md for +# a list of version numbers. +FROM phusion/baseimage:focal-1.0.0 + +# Use baseimage-docker's init system. +CMD ["/sbin/my_init"] + +# ...put your own build instructions here... +RUN apt update && apt-get install -y \ + build-essential \ + autoconf \ + automake \ + bison \ + flex \ + re2c \ + gdb \ + libtool \ + make \ + pkgconf \ + valgrind \ + git \ + libxml2-dev \ + libsqlite3-dev \ + wget \ + libssl-dev \ + ; + +# Clean up APT when done. +RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + +WORKDIR /usr/src +RUN git clone https://github.com/php/php-src.git + +ARG debug +RUN cd php-src \ + && git checkout php-8.0.9 \ + && ./buildconf --force \ + && ./configure $debug --enable-mbstring --disable-mbregex --with-openssl \ + && make -j`nproc` \ + && make install +RUN rm -rf php-src +COPY php.ini /usr/local/lib/php.ini + +RUN wget https://getcomposer.org/download/2.2.18/composer.phar -O /usr/local/bin/composer \ + && chmod +x /usr/local/bin/composer diff --git a/ext/build/php.ini b/ext/build/php.ini new file mode 100644 index 0000000..9cef300 --- /dev/null +++ b/ext/build/php.ini @@ -0,0 +1,14 @@ +date.timezone=GMT +max_execution_time=30 +memory_limit=128M + +error_reporting=E_ALL | E_STRICT ; catch all error and warnings +display_errors=1 +log_errors=1 + +zend_extension=opcache.so +opcache.enable=1 +opcache.enable_cli=1 +opcache.protect_memory=1 ; catch invalid updates of shared memory + +extension=/project/ext/jsonmachine/modules/jsonmachine.so diff --git a/ext/jsonmachine/.gitignore b/ext/jsonmachine/.gitignore new file mode 100644 index 0000000..757b256 --- /dev/null +++ b/ext/jsonmachine/.gitignore @@ -0,0 +1,37 @@ +*.lo +*.la +*.dep +*.loT +.libs +acinclude.m4 +aclocal.m4 +autom4te.cache +build +config.guess +config.h +config.h.in +config.log +config.nice +config.status +config.sub +configure +configure.ac +configure.in +include +install-sh +libtool +ltmain.sh +Makefile +Makefile.fragments +Makefile.global +Makefile.objects +missing +mkinstalldirs +modules +run-tests.php +tests/**/*.diff +tests/**/*.out +tests/**/*.php +tests/**/*.exp +tests/**/*.log +tests/**/*.sh diff --git a/ext/jsonmachine/config.m4 b/ext/jsonmachine/config.m4 new file mode 100644 index 0000000..59d632b --- /dev/null +++ b/ext/jsonmachine/config.m4 @@ -0,0 +1,94 @@ +dnl config.m4 for extension jsonmachine + +dnl Comments in this file start with the string 'dnl'. +dnl Remove where necessary. + +dnl If your extension references something external, use 'with': + +dnl PHP_ARG_WITH([jsonmachine], +dnl [for jsonmachine support], +dnl [AS_HELP_STRING([--with-jsonmachine], +dnl [Include jsonmachine support])]) + +dnl Otherwise use 'enable': + +PHP_ARG_ENABLE([jsonmachine], + [whether to enable jsonmachine support], + [AS_HELP_STRING([--enable-jsonmachine], + [Enable jsonmachine support])], + [no]) + +if test "$PHP_JSONMACHINE" != "no"; then + dnl Write more examples of tests here... + + dnl Remove this code block if the library does not support pkg-config. + dnl PKG_CHECK_MODULES([LIBFOO], [foo]) + dnl PHP_EVAL_INCLINE($LIBFOO_CFLAGS) + dnl PHP_EVAL_LIBLINE($LIBFOO_LIBS, JSONMACHINE_SHARED_LIBADD) + + dnl If you need to check for a particular library version using PKG_CHECK_MODULES, + dnl you can use comparison operators. For example: + dnl PKG_CHECK_MODULES([LIBFOO], [foo >= 1.2.3]) + dnl PKG_CHECK_MODULES([LIBFOO], [foo < 3.4]) + dnl PKG_CHECK_MODULES([LIBFOO], [foo = 1.2.3]) + + dnl Remove this code block if the library supports pkg-config. + dnl --with-jsonmachine -> check with-path + dnl SEARCH_PATH="/usr/local /usr" # you might want to change this + dnl SEARCH_FOR="/include/jsonmachine.h" # you most likely want to change this + dnl if test -r $PHP_JSONMACHINE/$SEARCH_FOR; then # path given as parameter + dnl JSONMACHINE_DIR=$PHP_JSONMACHINE + dnl else # search default path list + dnl AC_MSG_CHECKING([for jsonmachine files in default path]) + dnl for i in $SEARCH_PATH ; do + dnl if test -r $i/$SEARCH_FOR; then + dnl JSONMACHINE_DIR=$i + dnl AC_MSG_RESULT(found in $i) + dnl fi + dnl done + dnl fi + dnl + dnl if test -z "$JSONMACHINE_DIR"; then + dnl AC_MSG_RESULT([not found]) + dnl AC_MSG_ERROR([Please reinstall the jsonmachine distribution]) + dnl fi + + dnl Remove this code block if the library supports pkg-config. + dnl --with-jsonmachine -> add include path + dnl PHP_ADD_INCLUDE($JSONMACHINE_DIR/include) + + dnl Remove this code block if the library supports pkg-config. + dnl --with-jsonmachine -> check for lib and symbol presence + dnl LIBNAME=JSONMACHINE # you may want to change this + dnl LIBSYMBOL=JSONMACHINE # you most likely want to change this + + dnl If you need to check for a particular library function (e.g. a conditional + dnl or version-dependent feature) and you are using pkg-config: + dnl PHP_CHECK_LIBRARY($LIBNAME, $LIBSYMBOL, + dnl [ + dnl AC_DEFINE(HAVE_JSONMACHINE_FEATURE, 1, [ ]) + dnl ],[ + dnl AC_MSG_ERROR([FEATURE not supported by your jsonmachine library.]) + dnl ], [ + dnl $LIBFOO_LIBS + dnl ]) + + dnl If you need to check for a particular library function (e.g. a conditional + dnl or version-dependent feature) and you are not using pkg-config: + dnl PHP_CHECK_LIBRARY($LIBNAME, $LIBSYMBOL, + dnl [ + dnl PHP_ADD_LIBRARY_WITH_PATH($LIBNAME, $JSONMACHINE_DIR/$PHP_LIBDIR, JSONMACHINE_SHARED_LIBADD) + dnl AC_DEFINE(HAVE_JSONMACHINE_FEATURE, 1, [ ]) + dnl ],[ + dnl AC_MSG_ERROR([FEATURE not supported by your jsonmachine library.]) + dnl ],[ + dnl -L$JSONMACHINE_DIR/$PHP_LIBDIR -lm + dnl ]) + dnl + dnl PHP_SUBST(JSONMACHINE_SHARED_LIBADD) + + dnl In case of no dependencies + AC_DEFINE(HAVE_JSONMACHINE, 1, [ Have jsonmachine support ]) + + PHP_NEW_EXTENSION(jsonmachine, jsonmachine.c, $ext_shared) +fi diff --git a/ext/jsonmachine/config.w32 b/ext/jsonmachine/config.w32 new file mode 100644 index 0000000..7d0c966 --- /dev/null +++ b/ext/jsonmachine/config.w32 @@ -0,0 +1,7 @@ +ARG_ENABLE('jsonmachine', 'jsonmachine support', 'no'); + +if (PHP_JSONMACHINE != 'no') { + AC_DEFINE('HAVE_JSONMACHINE', 1, 'jsonmachine support enabled'); + + EXTENSION('jsonmachine', 'jsonmachine.c', null, '/DZEND_ENABLE_STATIC_TSRMLS_CACHE=1'); +} diff --git a/ext/jsonmachine/jsonmachine.c b/ext/jsonmachine/jsonmachine.c new file mode 100644 index 0000000..e6cd56f --- /dev/null +++ b/ext/jsonmachine/jsonmachine.c @@ -0,0 +1,346 @@ +/* jsonmachine extension for PHP */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "php.h" +#include "ext/standard/info.h" +#include "ext/standard/php_var.h" +#include "php_jsonmachine.h" +#include "jsonmachine_arginfo.h" +#include "zend_interfaces.h" +#include "zend_exceptions.h" + +#include +#include + +/* For compatibility with older PHP versions */ +#ifndef ZEND_PARSE_PARAMETERS_NONE +#define ZEND_PARSE_PARAMETERS_NONE() \ + ZEND_PARSE_PARAMETERS_START(0, 0) \ + ZEND_PARSE_PARAMETERS_END() +#endif + +static zend_always_inline void append_char_to_zval_string(zval *str, char c) +{ + size_t len = Z_STRLEN_P(str); + zend_string *new_str = zend_string_realloc(Z_STR_P(str), len + 1, 0); + ZSTR_VAL(new_str)[len] = c; + ZSTR_VAL(new_str)[len + 1] = '\0'; + ZVAL_STR(str, new_str); +} + +unsigned char uc(char ch) +{ + return (unsigned char) ch; +} + +bool zBool(zval *trueFalse) +{ + return Z_TYPE_P(trueFalse) == IS_TRUE; +} + +static bool colonCommaBracket[256]; +static bool tokenBoundaries[256]; +static bool insignificantBytes[256]; + +#define Z_EXTTOKENS_OBJ_P(zv) ((exttokens_object *)Z_OBJ_P((zv))) + +typedef struct _exttokens_object { + zend_object std; + zval jsonChunks; + + ssize_t key; + zval current; + + bool rewindCalled; + zval chunk; + size_t chunkLen; + zval tokenBuffer; + bool inString; + bool escaping; + size_t lastIndex; +} exttokens_object; + +void throw_php_exception(const char * className, const char * message) +{ + zend_string * classNameZS = zend_string_init(className, strlen(className), 0); + zend_class_entry * ce = zend_fetch_class(classNameZS, ZEND_FETCH_CLASS_AUTO); + if ( ! ce) { +// zend_error(); + return; + } + zend_throw_exception(ce, message, 0); + zend_string_release(classNameZS); +} + +PHP_METHOD(ExtTokens, __construct) +{ + zval *jsonChunks; + + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_ZVAL(jsonChunks) + ZEND_PARSE_PARAMETERS_END(); + + exttokens_object *this = Z_EXTTOKENS_OBJ_P(getThis()); + + ZVAL_COPY(&this->jsonChunks, jsonChunks); + ZVAL_EMPTY_STRING(&this->tokenBuffer); + ZVAL_EMPTY_STRING(&this->chunk); + this->chunkLen = 0; + ZVAL_EMPTY_STRING(&this->current); + this->inString = false; + this->escaping = false; + this->lastIndex = 0; + this->key = -1; + this->rewindCalled = false; +} + +PHP_METHOD(ExtTokens, current) +{ + exttokens_object *this = Z_EXTTOKENS_OBJ_P(getThis()); + RETURN_ZVAL(&this->current, 0, 0); +} + +PHP_METHOD(ExtTokens, next) +{ + zval token; + ZVAL_EMPTY_STRING(&token); + + exttokens_object *this = Z_EXTTOKENS_OBJ_P(getThis()); + ZVAL_EMPTY_STRING(&this->current); + + this->key++; + + if (this->lastIndex && this->lastIndex == this->chunkLen) { + return; + } + + do { + char * chunk = Z_STRVAL(this->chunk); + + size_t i; + for (i = this->lastIndex; i < this->chunkLen; i++) { + unsigned char byte; + byte = (unsigned char) chunk[i]; + if (this->escaping) { + this->escaping = false; + append_char_to_zval_string(&this->tokenBuffer, byte); + continue; + } + if (insignificantBytes[byte]) { + append_char_to_zval_string(&this->tokenBuffer, byte); + continue; + } + if (this->inString) { + if (byte == '"') { + this->inString = false; + } else if (byte == '\\') { + this->escaping = true; + } + append_char_to_zval_string(&this->tokenBuffer, byte); + + continue; + } + + if (tokenBoundaries[byte]) { + if (Z_STRLEN(this->tokenBuffer)) { + this->lastIndex = i; + ZVAL_COPY(&token, &this->tokenBuffer); + ZVAL_EMPTY_STRING(&this->tokenBuffer); + goto after; + } + if (colonCommaBracket[byte]) { + this->lastIndex = i+1; + ZVAL_STR(&token, zend_string_init((char *)&byte, 1, 0)); + goto after; + } + } else { // else branch matches `"` but also `\` outside of a string literal which is an error anyway but strictly speaking not correctly parsed token + this->inString = true; + append_char_to_zval_string(&this->tokenBuffer, byte); + } + } + + this->lastIndex = i; + + after: + if (this->lastIndex == this->chunkLen) { + zval valid; + if (this->rewindCalled) { + zend_call_method_with_0_params(Z_OBJ(this->jsonChunks), Z_OBJCE(this->jsonChunks), NULL, "next", NULL); + } else { + zend_call_method_with_0_params(Z_OBJ(this->jsonChunks), Z_OBJCE(this->jsonChunks), NULL, "rewind", NULL); + this->rewindCalled = true; + } + zend_call_method_with_0_params(Z_OBJ(this->jsonChunks), Z_OBJCE(this->jsonChunks), NULL, "valid", &valid); + if ( ! zBool(&valid)) { + if (Z_STRLEN_P(&token)) { + ZVAL_COPY(&this->current, &token); + } else { + ZVAL_COPY(&this->current, &this->tokenBuffer); + } + return; + } + zend_call_method_with_0_params(Z_OBJ(this->jsonChunks), Z_OBJCE(this->jsonChunks), NULL, "current", &this->chunk); + if (Z_TYPE(this->chunk) != IS_STRING) { + throw_php_exception( + "JsonMachine\\Exception\\JsonMachineException", + "Iterator providing token chunks must produce strings." + ); + return; + } + this->chunkLen = Z_STRLEN(this->chunk); + this->lastIndex = 0; + } + } while (Z_STRLEN(token) == 0); + + ZVAL_COPY(&this->current, &token); +} + +PHP_METHOD(ExtTokens, key) +{ + exttokens_object *this = Z_EXTTOKENS_OBJ_P(getThis()); + RETURN_LONG(this->key); +} + +PHP_METHOD(ExtTokens, valid) +{ + exttokens_object *this = Z_EXTTOKENS_OBJ_P(getThis()); + RETURN_BOOL(Z_STRLEN(this->current)); +} + +PHP_METHOD(ExtTokens, rewind) +{ + exttokens_object *this = Z_EXTTOKENS_OBJ_P(getThis()); + zend_call_method_with_0_params(&this->std, this->std.ce, NULL, "next", NULL); +} + +zend_object *exttokens_create_handler(zend_class_entry *ce) +{ + exttokens_object *handler = emalloc(sizeof(exttokens_object)); + memset(handler, 0, sizeof(exttokens_object)); + zend_object_std_init(&handler->std, ce); + object_properties_init(&handler->std, ce); + handler->std.handlers = &exttokens_object_handlers; + return &handler->std; +} + +void exttokens_object_free_obj(zend_object *object) +{ + exttokens_object *handler = (exttokens_object *)object; + /* Here you can free any resources that your object has acquired, + but do NOT free the object itself; Zend will do that for you */ + zend_object_std_dtor(&handler->std); +// efree(object); +} + +void init_char_maps() +{ + for (int j = 0; j < 256; j++) { + insignificantBytes[j] = true; + } + insignificantBytes[uc('\\')] = false; + insignificantBytes[uc('"')] = false; + insignificantBytes[uc('\xEF')] = false; + insignificantBytes[uc('\xBB')] = false; + insignificantBytes[uc('\xBF')] = false; + insignificantBytes[uc(' ')] = false; + insignificantBytes[uc('\n')] = false; + insignificantBytes[uc('\r')] = false; + insignificantBytes[uc('\t')] = false; + insignificantBytes[uc('{')] = false; + insignificantBytes[uc('}')] = false; + insignificantBytes[uc('[')] = false; + insignificantBytes[uc(']')] = false; + insignificantBytes[uc(':')] = false; + insignificantBytes[uc(',')] = false; + + for (int j = 0; j < 256; j++) { + tokenBoundaries[j] = false; + } + tokenBoundaries[uc('\xEF')] = true; + tokenBoundaries[uc('\xBB')] = true; + tokenBoundaries[uc('\xBF')] = true; + tokenBoundaries[uc(' ')] = true; + tokenBoundaries[uc('\n')] = true; + tokenBoundaries[uc('\r')] = true; + tokenBoundaries[uc('\t')] = true; + tokenBoundaries[uc('{')] = true; + tokenBoundaries[uc('}')] = true; + tokenBoundaries[uc('[')] = true; + tokenBoundaries[uc(']')] = true; + tokenBoundaries[uc(':')] = true; + tokenBoundaries[uc(',')] = true; + + for (int j = 0; j < 256; j++) { + colonCommaBracket[j] = false; + } + colonCommaBracket[uc('{')] = true; + colonCommaBracket[uc('}')] = true; + colonCommaBracket[uc('[')] = true; + colonCommaBracket[uc(']')] = true; + colonCommaBracket[uc(':')] = true; + colonCommaBracket[uc(',')] = true; +} + +PHP_MINIT_FUNCTION(jsonmachine) +{ + init_char_maps(); + + zend_class_entry ce; + INIT_CLASS_ENTRY(ce, "ExtTokens", class_ExtTokens_methods); + exttokens_ce = zend_register_internal_class(&ce); + exttokens_ce->create_object = exttokens_create_handler; + zend_class_implements(exttokens_ce, 1, zend_ce_iterator); + + memcpy(&exttokens_object_handlers, zend_get_std_object_handlers(), sizeof(zend_object_handlers)); + exttokens_object_handlers.clone_obj = NULL; + exttokens_object_handlers.offset = XtOffsetOf(exttokens_object, std); + exttokens_object_handlers.free_obj = exttokens_object_free_obj; + + return SUCCESS; +} + + +/* {{{ PHP_RINIT_FUNCTION */ +PHP_RINIT_FUNCTION(jsonmachine) +{ +#if defined(ZTS) && defined(COMPILE_DL_JSONMACHINE) + ZEND_TSRMLS_CACHE_UPDATE(); +#endif + + return SUCCESS; +} +/* }}} */ + +/* {{{ PHP_MINFO_FUNCTION */ +PHP_MINFO_FUNCTION(jsonmachine) +{ + php_info_print_table_start(); + php_info_print_table_header(2, "jsonmachine support", "enabled"); + php_info_print_table_end(); +} +/* }}} */ + +/* {{{ jsonmachine_module_entry */ +zend_module_entry jsonmachine_module_entry = { + STANDARD_MODULE_HEADER, + "jsonmachine", /* Extension name */ + NULL, /* zend_function_entry */ + PHP_MINIT(jsonmachine), /* PHP_MINIT - Module initialization */ + NULL, /* PHP_MSHUTDOWN - Module shutdown */ + PHP_RINIT(jsonmachine), /* PHP_RINIT - Request initialization */ + NULL, /* PHP_RSHUTDOWN - Request shutdown */ + PHP_MINFO(jsonmachine), /* PHP_MINFO - Module info */ + PHP_JSONMACHINE_VERSION, /* Version */ + STANDARD_MODULE_PROPERTIES +}; +/* }}} */ + +#ifdef COMPILE_DL_JSONMACHINE +# ifdef ZTS +ZEND_TSRMLS_CACHE_DEFINE() +# endif +ZEND_GET_MODULE(jsonmachine) +#endif diff --git a/ext/jsonmachine/jsonmachine.stub.php b/ext/jsonmachine/jsonmachine.stub.php new file mode 100644 index 0000000..21f99f4 --- /dev/null +++ b/ext/jsonmachine/jsonmachine.stub.php @@ -0,0 +1,35 @@ + +--FILE-- + +--EXPECT-- +The extension "jsonmachine" is available diff --git a/src/ExtTokens.php b/src/ExtTokens.php new file mode 100644 index 0000000..b34cf93 --- /dev/null +++ b/src/ExtTokens.php @@ -0,0 +1,38 @@ + $jsonChunks + */ + public function __construct($jsonChunks) + { + $this->jsonChunks = $jsonChunks; + } + + public function getIterator() + { + $lastIndex = 0; + $inString = false; + $escaping = false; + $tokenBuffer = ''; + + foreach ($this->jsonChunks as $jsonChunk) { + while (null !== ($token = jsonmachine_next_token($jsonChunk, $tokenBuffer, $escaping, $inString, $lastIndex))) { + yield $token; + } + } + } + + public function getPosition() + { + return 0; + } +} diff --git a/src/IteratorLexerPOC.php b/src/IteratorLexerPOC.php new file mode 100644 index 0000000..c44b323 --- /dev/null +++ b/src/IteratorLexerPOC.php @@ -0,0 +1,227 @@ + $jsonChunks + */ + public function __construct(Iterator $jsonChunks) + { + $this->jsonChunks = $jsonChunks; + $this->tokenBoundaries = $this->mapOfBoundaryBytes(); + $this->jsonInsignificantBytes = $this->jsonInsignificantBytes(); + } + + #[\ReturnTypeWillChange] + public function rewind() + { + $this->jsonChunksRewind(); + $this->next(); + } + + #[\ReturnTypeWillChange] + public function next() + { + $this->current = ''; + + for (; $this->chunkIndex < $this->chunkLength; ++$this->chunkIndex) { + if ($this->carryToken != null) { + $this->current = $this->carryToken; + $this->carryToken = null; + ++$this->key; + + return; + } + + $byte = $this->chunk[$this->chunkIndex]; + + if ($this->escaping) { + $this->escaping = false; + $this->tokenBuffer .= $byte; + continue; + } + + if ($this->jsonInsignificantBytes[$byte]) { + $this->tokenBuffer .= $byte; + continue; + } + + if ($this->inString) { + if ($byte == '"') { + $this->inString = false; + } elseif ($byte == '\\') { + $this->escaping = true; + } + $this->tokenBuffer .= $byte; + continue; + } + + if (isset($this->tokenBoundaries[$byte])) { // if byte is any token boundary + if ($this->tokenBuffer != '') { + $this->current = $this->tokenBuffer; + $this->tokenBuffer = ''; + } + if ($this->tokenBoundaries[$byte]) { // if byte is not whitespace token boundary + $this->carryToken = $byte; + } + if ($this->current != '') { + ++$this->key; + ++$this->chunkIndex; + + return; + } + } else { + if ($byte == '"') { + $this->inString = true; + } + $this->tokenBuffer .= $byte; + } + } + + if ($this->jsonChunksNext()) { + $this->next(); + } elseif ($this->carryToken) { + $this->current = $this->carryToken; + $this->carryToken = null; + ++$this->key; + } + } + + #[\ReturnTypeWillChange] + public function valid() + { + return $this->current !== ''; + } + + #[\ReturnTypeWillChange] + public function current() + { + return $this->current; + } + + #[\ReturnTypeWillChange] + public function key() + { + return $this->key; + } + + private function mapOfBoundaryBytes(): array + { + $utf8bom = "\xEF\xBB\xBF"; + + $boundary = []; + $boundary[$utf8bom[0]] = 0; + $boundary[$utf8bom[1]] = 0; + $boundary[$utf8bom[2]] = 0; + $boundary[' '] = 0; + $boundary["\n"] = 0; + $boundary["\r"] = 0; + $boundary["\t"] = 0; + + $boundary['{'] = 1; + $boundary['}'] = 1; + $boundary['['] = 1; + $boundary[']'] = 1; + $boundary[':'] = 1; + $boundary[','] = 1; + + return $boundary; + } + + private function jsonInsignificantBytes(): array + { + $bytes = []; + foreach (range(0, 255) as $ord) { + $bytes[chr($ord)] = ! in_array( + chr($ord), + ['\\', '"', "\xEF", "\xBB", "\xBF", ' ', "\n", "\r", "\t", '{', '}', '[', ']', ':', ','] + ); + } + + return $bytes; + } + + private function initCurrentChunk(): bool + { + $valid = $this->jsonChunks->valid(); + + if ($valid) { + $this->chunk = $this->jsonChunks->current(); + $this->chunkLength = strlen($this->chunk); + $this->chunkIndex = 0; + } + + return $valid; + } + + public function getPosition(): int + { + return 0; + } + + public function getLine(): int + { + return 1; + } + + public function getColumn(): int + { + return 0; + } + + private function jsonChunksRewind(): bool + { + $this->jsonChunks->rewind(); + + return $this->initCurrentChunk(); + } + + private function jsonChunksNext(): bool + { + $this->jsonChunks->next(); + + return $this->initCurrentChunk(); + } +} diff --git a/test/ExtTokensTest.php b/test/ExtTokensTest.php new file mode 100644 index 0000000..bfa1b1c --- /dev/null +++ b/test/ExtTokensTest.php @@ -0,0 +1,35 @@ +markTestSkipped(); + } + $extTokens = new ExtTokens(new ArrayIterator(['1.0', '1', '2', '3', '5', '[]'])); + $this->assertInstanceOf(Iterator::class, $extTokens); + $this->assertSame(['1.01235', '[', ']'], iterator_to_array($extTokens)); + } + + public function testThrowsOnNonStringChunk() + { + if ( ! class_exists(ExtTokens::class)) { + $this->markTestSkipped(); + } + + $extTokens = new ExtTokens(new ArrayIterator(['true', 10])); + + $this->expectException(JsonMachineException::class); + $this->expectExceptionMessage('string'); + iterator_to_array($extTokens); + } +} diff --git a/test/JsonMachineTest/ExtTokensTest.php b/test/JsonMachineTest/ExtTokensTest.php new file mode 100644 index 0000000..3f2f782 --- /dev/null +++ b/test/JsonMachineTest/ExtTokensTest.php @@ -0,0 +1,25 @@ +markTestSkipped('jsonmachine extension not loaded'); + } + + return [ +// 'ext' => [ExtTokens::class], + 'extfull' => [\ExtTokens::class], + ]; + } +} diff --git a/test/JsonMachineTest/TokensTest.php b/test/JsonMachineTest/TokensTest.php index 09cf7d9..c661857 100644 --- a/test/JsonMachineTest/TokensTest.php +++ b/test/JsonMachineTest/TokensTest.php @@ -4,7 +4,9 @@ namespace JsonMachineTest; +use ArrayIterator; use JsonMachine\FileChunks; +use JsonMachine\IteratorLexerPOC; use JsonMachine\StreamChunks; use JsonMachine\StringChunks; use JsonMachine\Tokens; @@ -13,43 +15,41 @@ /** * @covers \JsonMachine\Tokens * @covers \JsonMachine\TokensWithDebugging + * @covers \JsonMachine\IteratorLexerPOC */ class TokensTest extends \PHPUnit_Framework_TestCase { - public function bothDebugModes() + public function availableTokenizers() { return [ 'debug enabled' => [TokensWithDebugging::class], 'debug disabled' => [Tokens::class], + 'Iterator POC' => [IteratorLexerPOC::class], ]; } /** - * @dataProvider bothDebugModes + * @dataProvider availableTokenizers */ public function testCorrectlyYieldsZeroToken($tokensClass) { - $data = ['0']; - $expected = ['0']; - $this->assertEquals($expected, iterator_to_array(new $tokensClass(new \ArrayIterator($data)))); - $stream = fopen('data://text/plain,{"value":0}', 'r'); $expected = ['{', '"value"', ':', '0', '}']; - $this->assertEquals($expected, iterator_to_array(new $tokensClass(new StreamChunks($stream, 10)))); + $this->assertEquals($expected, iterator_to_array(new $tokensClass((new StreamChunks($stream, 10))->getIterator()))); } /** - * @dataProvider bothDebugModes + * @dataProvider availableTokenizers */ public function testGeneratesTokens($tokensClass) { - $data = ['{}[],:null,"string" false:', 'true,1,100000,1.555{-56]"","\\""']; - $expected = ['{', '}', '[', ']', ',', ':', 'null', ',', '"string"', 'false', ':', 'true', ',', '1', ',', '100000', ',', '1.555', '{', '-56', ']', '""', ',', '"\\""']; + $data = ['{}[],:null,"string" false:', 'true,1,100000,1.555{-56]"","\\"",']; + $expected = ['{', '}', '[', ']', ',', ':', 'null', ',', '"string"', 'false', ':', 'true', ',', '1', ',', '100000', ',', '1.555', '{', '-56', ']', '""', ',', '"\\""', ',']; $this->assertEquals($expected, iterator_to_array(new $tokensClass(new \ArrayIterator($data)))); } /** - * @dataProvider bothDebugModes + * @dataProvider availableTokenizers */ public function testWithBOM($tokensClass) { @@ -59,7 +59,7 @@ public function testWithBOM($tokensClass) } /** - * @dataProvider bothDebugModes + * @dataProvider availableTokenizers */ public function testCorrectlyParsesTwoBackslashesAtTheEndOfAString($tokensClass) { @@ -67,7 +67,7 @@ public function testCorrectlyParsesTwoBackslashesAtTheEndOfAString($tokensClass) } /** - * @dataProvider bothDebugModes + * @dataProvider availableTokenizers */ public function testCorrectlyParsesEscapedQuotesInTheMiddleOfAString($tokensClass) { @@ -77,7 +77,7 @@ public function testCorrectlyParsesEscapedQuotesInTheMiddleOfAString($tokensClas } /** - * @dataProvider bothDebugModes + * @dataProvider availableTokenizers */ public function testCorrectlyParsesChunksSplitBeforeStringEnd($tokensClass) { @@ -87,7 +87,7 @@ public function testCorrectlyParsesChunksSplitBeforeStringEnd($tokensClass) } /** - * @dataProvider bothDebugModes + * @dataProvider availableTokenizers */ public function testCorrectlyParsesChunksSplitBeforeEscapedCharacter($tokensClass) { @@ -97,7 +97,7 @@ public function testCorrectlyParsesChunksSplitBeforeEscapedCharacter($tokensClas } /** - * @dataProvider bothDebugModes + * @dataProvider availableTokenizers */ public function testCorrectlyParsesChunksSplitAfterEscapedCharacter($tokensClass) { @@ -107,7 +107,7 @@ public function testCorrectlyParsesChunksSplitAfterEscapedCharacter($tokensClass } /** - * @dataProvider bothDebugModes + * @dataProvider availableTokenizers */ public function testAnyPossibleChunkSplit($tokensClass) { @@ -144,7 +144,7 @@ public function testAnyPossibleChunkSplit($tokensClass) foreach (range(1, strlen($json)) as $chunkLength) { $chunks = str_split($json, $chunkLength); - $result = iterator_to_array(new $tokensClass($chunks)); + $result = iterator_to_array(new $tokensClass(new ArrayIterator($chunks))); $this->assertSame($expected, $result); } diff --git a/test/performance/testPerformance.php b/test/performance/testPerformance.php index fc7c50f..caf563e 100644 --- a/test/performance/testPerformance.php +++ b/test/performance/testPerformance.php @@ -2,10 +2,16 @@ declare(strict_types=1); +use ExtTokens; +use JsonMachine\FileChunks; use JsonMachine\Items; +use JsonMachine\Parser; +use JsonMachine\Tokens; require_once __DIR__.'/../../vendor/autoload.php'; +passthru('php -v'); +echo 'Ext jsonmachine version: '.phpversion('jsonmachine').PHP_EOL; if ( ! ini_get('xdebug.mode')) { echo "Xdebug disabled\n"; } else { @@ -27,19 +33,22 @@ ini_set('memory_limit', '-1'); // for json_decode use case $decoders = [ - 'Items::fromFile()' => function ($file) { + 'php Items (TokensWithDebugging)' => function ($file) { + return Items::fromFile($file, ['debug' => true]); + }, + 'php Items (Tokens)' => function ($file) { return Items::fromFile($file); }, - 'Items::fromString()' => function ($file) { - return Items::fromString(stream_get_contents(fopen($file, 'r'))); + 'php Tokens' => function ($file) { + return new Tokens((new FileChunks($file))->getIterator()); }, - 'Items::fromFile() - debug' => function ($file) { - return Items::fromFile($file, ['debug' => true]); + 'ext Items' => function ($file) { + return new Parser(new ExtTokens((new FileChunks($file))->getIterator())); }, - 'Items::fromString() - debug' => function ($file) { - return Items::fromString(stream_get_contents(fopen($file, 'r')), ['debug' => true]); + 'ext ExtTokens' => function ($file) { + return new ExtTokens((new FileChunks($file))->getIterator()); }, - 'json_decode()' => function ($file) { + 'ext json_decode()' => function ($file) { return json_decode(stream_get_contents(fopen($file, 'r')), true); }, ];