Skip to content

Commit

Permalink
Change token map lookup to avoid creating associative array
Browse files Browse the repository at this point in the history
  • Loading branch information
dmsnell committed May 1, 2024
1 parent e8b2cff commit bb6be71
Show file tree
Hide file tree
Showing 2 changed files with 665 additions and 646 deletions.
50 changes: 34 additions & 16 deletions src/wp-includes/class-wp-token-map.php
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ class WP_Token_Map {
*/
private $large_words = array();

private $groups = '';

/**
* Stores an optimized row of short words, where every entry is
* `$this->key_size + 1` bytes long and zero-extended.
Expand Down Expand Up @@ -195,16 +197,23 @@ public static function from_array( $mappings, $key_length = 2 ) {
$map->small_mappings[] = $mappings[ $word ];
}

foreach ( $groups as $group => $group_words ) {
$group_keys = array_keys( $groups );
sort( $group_keys );

foreach ( $group_keys as $group ) {
$map->groups .= "{$group}\x00";

$group_string = '';

foreach ( $group_words as $group_word ) {
foreach ( $groups[ $group ] as $group_word ) {
list( $word, $mapping ) = $group_word;

$group_string .= pack( 'C', strlen( $word ) ) . $word . pack( 'C', strlen( $mapping ) ) . $mapping;
$word_length = pack( 'C', strlen( $word ) );
$mapping_length = pack( 'C', strlen( $mapping ) );
$group_string .= "{$word_length}{$word}{$mapping_length}{$mapping}";
}

$map->large_words[ $group ] = $group_string;
$map->large_words[] = $group_string;
}

return $map;
Expand All @@ -226,10 +235,11 @@ public static function from_array( $mappings, $key_length = 2 ) {
*
* @return WP_Token_Map Map with precomputed data loaded.
*/
public static function from_precomputed_table( $key_length, $large_words, $small_words, $small_mappings ) {
public static function from_precomputed_table( $key_length, $groups, $large_words, $small_words, $small_mappings ) {
$map = new WP_Token_Map();

$map->key_length = $key_length;
$map->groups = $groups;
$map->large_words = $large_words;
$map->small_words = $small_words;
$map->small_mappings = $small_mappings;
Expand Down Expand Up @@ -261,11 +271,11 @@ public function contains( $word ) {
}

$group_key = substr( $word, 0, $this->key_length );
if ( ! isset( $this->large_words[ $group_key ] ) ) {
$group_at = strpos( $this->groups, $group_key );
if ( false === $group_at ) {
return false;
}

$group = $this->large_words[ $group_key ];
$group = $this->large_words[ $group_at / ( $this->key_length + 1 ) ];
$group_length = strlen( $group );
$slug = substr( $word, $this->key_length );
$length = strlen( $slug );
Expand Down Expand Up @@ -336,7 +346,8 @@ public function read_token( $text, $offset = 0, &$skip_bytes = null ) {
if ( $text_length > $this->key_length ) {
$group_key = substr( $text, $offset, $this->key_length );

if ( ! isset( $this->large_words[ $group_key ] ) ) {
$group_at = strpos( $this->groups, $group_key );
if ( false === $group_at ) {
// Perhaps a short word then.
$small_text = str_pad( substr( $text, $offset, $this->key_length ), $this->key_length + 1, "\x00", STR_PAD_RIGHT );
$at = strpos( $this->small_words, $small_text );
Expand All @@ -349,7 +360,7 @@ public function read_token( $text, $offset = 0, &$skip_bytes = null ) {
return $this->small_mappings[ $at / ( $this->key_length + 1 ) ];
}

$group = $this->large_words[ $group_key ];
$group = $this->large_words[ $group_at / ( $this->key_length + 1 ) ];
$group_length = strlen( $group );
$at = 0;
while ( $at < $group_length ) {
Expand Down Expand Up @@ -408,7 +419,8 @@ public function to_array() {
$at += $this->key_length + 1;
}

foreach ( $this->large_words as $prefix => $group ) {
foreach ( $this->large_words as $index => $group ) {
$prefix = substr( $this->groups, $index * ( $this->key_length + 1 ), 2 );
$group_length = strlen( $group );
$at = 0;
while ( $at < $group_length ) {
Expand Down Expand Up @@ -457,15 +469,21 @@ public function precomputed_php_source_table( $indent = "\t" ) {

$output = self::class . "::from_precomputed_table(\n";
$output .= "{$i1}{$this->key_length},\n";

$group_line = str_replace( "\x00", "\\x00", $this->groups );
$output .= "{$i1}\"{$group_line}\",\n";

$output .= "{$i1}array(\n";

$prefixes = array_keys( $this->large_words );
sort( $prefixes );
foreach ( $prefixes as $prefix ) {
$group = $this->large_words[ $prefix ];
$prefixes = explode( "\x00", $this->groups );
foreach ( $prefixes as $index => $prefix ) {
if ( '' === $prefix ) {
break;
}
$group = $this->large_words[ $index ];
$group_length = strlen( $group );
$comment_line = "{$i2}//";
$data_line = "{$i2}'{$prefix}' => \"";
$data_line = "{$i2}\"";
$at = 0;
while ( $at < $group_length ) {
$token_length = unpack( 'C', $group[ $at++ ] )[1];
Expand Down
Loading

0 comments on commit bb6be71

Please sign in to comment.