From dbb8c7ea663a93ade82eff48ef780db0339af413 Mon Sep 17 00:00:00 2001 From: Sean Fisher Date: Wed, 21 Dec 2022 15:18:41 -0500 Subject: [PATCH 1/5] Wiring up storage for the cursor from the transformer level --- src/class-runner.php | 21 ++++++++++++ src/contracts/interface-with-cursor.php | 35 +++++++++++++++++++ src/processor/class-rss-processor.php | 5 ++- src/processor/trait-cursor.php | 41 +++++++++++++++++++++++ src/transformer/class-rss-transformer.php | 1 + src/transformer/class-transformer.php | 7 ++++ src/transformer/class-xml-transformer.php | 15 ++++++++- tests/class-test-case.php | 6 +++- tests/processor/test-cursor.php | 40 ++++++++++++++++++++++ tests/processor/test-rss-processor.php | 4 +++ 10 files changed, 172 insertions(+), 3 deletions(-) create mode 100644 src/contracts/interface-with-cursor.php create mode 100644 src/processor/trait-cursor.php create mode 100644 tests/processor/test-cursor.php diff --git a/src/class-runner.php b/src/class-runner.php index 1b2f127e..26d6420c 100644 --- a/src/class-runner.php +++ b/src/class-runner.php @@ -7,6 +7,7 @@ namespace Feed_Consumer; +use Feed_Consumer\Contracts\With_Cursor; use Monolog\Logger; use Psr\Log\LoggerInterface; use RuntimeException; @@ -77,6 +78,15 @@ public static function processor( int $feed_id ): Contracts\Processor { $processor->set_settings( $settings[ Settings::escape_setting_name( $settings['processor'] ) ] ?? [] ); + // Instantiate the processor's cursor if supported. + if ( $processor instanceof With_Cursor ) { + $cursor = get_post_meta( $feed_id, With_Cursor::CURSOR_META_KEY, true ); + + if ( ! is_null( $cursor ) && '' !== $cursor ) { + $processor->set_cursor( (string) $cursor ); + } + } + return $processor; } @@ -288,6 +298,17 @@ public function run() { */ do_action( 'feed_consumer_run_complete', $this->feed_id, $loaded_data, $processor::class ); + // Store the cursor of the processor. + if ( $processor instanceof With_Cursor ) { + $cursor = $processor->get_cursor(); + + if ( is_null( $cursor ) ) { + delete_post_meta( $this->feed_id, With_Cursor::CURSOR_META_KEY ); + } else { + update_post_meta( $this->feed_id, With_Cursor::CURSOR_META_KEY, $cursor ); + } + } + // Update the last run time of the feed. update_post_meta( $this->feed_id, static::LAST_RUN_META_KEY, current_time( 'timestamp' ) ); // phpcs:ignore WordPress.DateTime.CurrentTimeTimestamp.Requested diff --git a/src/contracts/interface-with-cursor.php b/src/contracts/interface-with-cursor.php new file mode 100644 index 00000000..61ae898c --- /dev/null +++ b/src/contracts/interface-with-cursor.php @@ -0,0 +1,35 @@ +cursor; + } + + /** + * Set the cursor. + * + * @param string $cursor Cursor to set. + * @return static + */ + public function set_cursor( string $cursor ): static { + $this->cursor = $cursor; + + return $this; + } +} diff --git a/src/transformer/class-rss-transformer.php b/src/transformer/class-rss-transformer.php index dc94450a..50431e0f 100644 --- a/src/transformer/class-rss-transformer.php +++ b/src/transformer/class-rss-transformer.php @@ -28,6 +28,7 @@ public function presets(): array { return (array) apply_filters( 'feed_consumer_rss_transformer_presets', [ + static::PATH_CURSOR => 'pubDate', static::PATH_ITEMS => '/rss/channel/item', static::PATH_GUID => 'guid', static::PATH_TITLE => 'title', diff --git a/src/transformer/class-transformer.php b/src/transformer/class-transformer.php index 9be7908a..fa3b348a 100644 --- a/src/transformer/class-transformer.php +++ b/src/transformer/class-transformer.php @@ -94,6 +94,13 @@ abstract class Transformer implements With_Extractor, With_Processor, Contract { */ public const PATH_IMAGE_CREDIT = 'path_image_credit'; + /** + * XPath key for the item cursor (date or ID). + * + * @var string + */ + public const PATH_CURSOR = 'path_cursor'; + /** * Settings key to not convert to Gutenberg blocks. * diff --git a/src/transformer/class-xml-transformer.php b/src/transformer/class-xml-transformer.php index 090026f1..557b2707 100644 --- a/src/transformer/class-xml-transformer.php +++ b/src/transformer/class-xml-transformer.php @@ -8,6 +8,7 @@ namespace Feed_Consumer\Transformer; use Alley\WP\Block_Converter\Block_Converter; +use Feed_Consumer\Contracts\With_Cursor; use Feed_Consumer\Contracts\With_Presets; use Feed_Consumer\Contracts\With_Setting_Fields; use Feed_Consumer\Loader\Post_Loader; @@ -91,8 +92,9 @@ public function data(): array { return []; } - return array_map( + $items = array_map( fn ( SimpleXMLElement $item ) => [ + 'cursor' => $this->extract_by_xpath( $item, $settings[ static::PATH_CURSOR ] ?? '' ), Post_Loader::BYLINE => $this->extract_by_xpath( $item, $settings[ static::PATH_BYLINE ] ?? 'author' ), Post_Loader::CONTENT => empty( $settings[ static::DONT_CONVERT_TO_BLOCKS ] ) ? (string) new Block_Converter( $this->extract_by_xpath( $item, $settings[ static::PATH_CONTENT ] ?? 'description' ) ) @@ -107,6 +109,17 @@ public function data(): array { ], (array) $items, ); + + // Update the processor's cursor if supported. + if ( $this->processor && $this->processor instanceof With_Cursor ) { + $last_item = end( $items ); + + if ( ! empty( $last_item['cursor'] ) ) { + $this->processor->set_cursor( $last_item['cursor'] ); + } + } + + return $items; } /** diff --git a/tests/class-test-case.php b/tests/class-test-case.php index 3d525b52..aad28ceb 100644 --- a/tests/class-test-case.php +++ b/tests/class-test-case.php @@ -4,7 +4,9 @@ use Feed_Consumer\Contracts\Extractor; use Feed_Consumer\Contracts\Processor as Processor_Contract; use Feed_Consumer\Contracts\Transformer; +use Feed_Consumer\Contracts\With_Cursor; use Feed_Consumer\Loader\Post_Loader; +use Feed_Consumer\Processor\Cursor; use Feed_Consumer\Processor\Processor; use Mantle\Http_Client\Response; use Mantle\Testing\Concerns\With_Faker; @@ -24,7 +26,9 @@ public function setUp(): void { } protected function make_processor( array $settings = [] ): Processor { - $instance = new class() extends Processor { + $instance = new class() extends Processor implements With_Cursor { + use Cursor; + public function name(): string { return 'Test Processor'; } diff --git a/tests/processor/test-cursor.php b/tests/processor/test-cursor.php new file mode 100644 index 00000000..55cdc644 --- /dev/null +++ b/tests/processor/test-cursor.php @@ -0,0 +1,40 @@ +make_processor(); + + $this->assertNull( $processor->get_cursor() ); + } + + public function test_get_cursor() { + $processor = $this->make_processor(); + + $processor->set_cursor( '123' ); + + $this->assertEquals( '123', $processor->get_cursor() ); + } + + public function test_set_cursor() { + $processor = $this->make_processor(); + + $this->assertNull( $processor->get_cursor() ); + + $processor->set_cursor( '123' ); + + $this->assertEquals( '123', $processor->get_cursor() ); + } +} diff --git a/tests/processor/test-rss-processor.php b/tests/processor/test-rss-processor.php index c502520d..1ef7182f 100644 --- a/tests/processor/test-rss-processor.php +++ b/tests/processor/test-rss-processor.php @@ -57,6 +57,8 @@ public function test_load_rss_feed() { ] ); + $this->assertNull( Runner::processor( $feed_id )->get_cursor() ); + Runner::run_scheduled( $feed_id ); $this->assertPostExists( @@ -69,6 +71,8 @@ public function test_load_rss_feed() { $this->assertNotEmpty( get_post_meta( $feed_id, Runner::LAST_RUN_META_KEY, true ) ); $this->assertInCronQueue( Runner::CRON_HOOK, [ $feed_id ] ); + + $this->assertNotNull( Runner::processor( $feed_id )->get_cursor() ); } public function test_handle_rss_feed_error() { From bda18363cd97940ba73877c94f5688192d9a0643 Mon Sep 17 00:00:00 2001 From: Sean Fisher Date: Wed, 21 Dec 2022 15:34:25 -0500 Subject: [PATCH 2/5] Check the cursor during the transformer to prevent duplicates --- composer.json | 3 +- src/transformer/class-transformer.php | 3 +- src/transformer/class-xml-transformer.php | 70 ++++++++++++++++------ tests/transformer/test-rss-transformer.php | 23 +++++++ 4 files changed, 80 insertions(+), 19 deletions(-) diff --git a/composer.json b/composer.json index 3d94862c..af138cf7 100644 --- a/composer.json +++ b/composer.json @@ -18,7 +18,8 @@ "php": "^8.0", "alleyinteractive/composer-wordpress-autoloader": "^1.0", "alleyinteractive/wp-block-converter": "^1.0", - "mantle-framework/support": "^0.9.1" + "mantle-framework/support": "^0.9.1", + "nesbot/carbon": "^2.64" }, "require-dev": { "alleyinteractive/alley-coding-standards": "^1.0", diff --git a/src/transformer/class-transformer.php b/src/transformer/class-transformer.php index fa3b348a..6268158c 100644 --- a/src/transformer/class-transformer.php +++ b/src/transformer/class-transformer.php @@ -10,6 +10,7 @@ use Feed_Consumer\Contracts\Extractor; use Feed_Consumer\Contracts\Processor; use Feed_Consumer\Contracts\Transformer as Contract; +use Feed_Consumer\Contracts\With_Cursor; use Feed_Consumer\Contracts\With_Extractor; use Feed_Consumer\Contracts\With_Processor; @@ -111,7 +112,7 @@ abstract class Transformer implements With_Extractor, With_Processor, Contract { /** * Processor instance. * - * @var Processor|null + * @var Processor|With_Cursor|null */ protected ?Processor $processor; diff --git a/src/transformer/class-xml-transformer.php b/src/transformer/class-xml-transformer.php index 557b2707..ba3fb5ed 100644 --- a/src/transformer/class-xml-transformer.php +++ b/src/transformer/class-xml-transformer.php @@ -8,6 +8,7 @@ namespace Feed_Consumer\Transformer; use Alley\WP\Block_Converter\Block_Converter; +use Carbon\Carbon; use Feed_Consumer\Contracts\With_Cursor; use Feed_Consumer\Contracts\With_Presets; use Feed_Consumer\Contracts\With_Setting_Fields; @@ -36,6 +37,7 @@ public function setting_fields(): array { return [ static::PATH_ITEMS => new Fieldmanager_TextField( __( 'XPath to items', 'feed-consumer' ) ), + static::PATH_CURSOR => new Fieldmanager_TextField( __( 'XPath to cursor field (date)', 'feed-consumer' ) ), static::PATH_GUID => new Fieldmanager_TextField( __( 'XPath to guid', 'feed-consumer' ) ), static::PATH_TITLE => new Fieldmanager_TextField( __( 'XPath to title', 'feed-consumer' ) ), static::PATH_PERMALINK => new Fieldmanager_TextField( __( 'XPath to permalink', 'feed-consumer' ) ), @@ -92,23 +94,57 @@ public function data(): array { return []; } - $items = array_map( - fn ( SimpleXMLElement $item ) => [ - 'cursor' => $this->extract_by_xpath( $item, $settings[ static::PATH_CURSOR ] ?? '' ), - Post_Loader::BYLINE => $this->extract_by_xpath( $item, $settings[ static::PATH_BYLINE ] ?? 'author' ), - Post_Loader::CONTENT => empty( $settings[ static::DONT_CONVERT_TO_BLOCKS ] ) - ? (string) new Block_Converter( $this->extract_by_xpath( $item, $settings[ static::PATH_CONTENT ] ?? 'description' ) ) - : $this->extract_by_xpath( $item, $settings[ static::PATH_CONTENT ] ?? 'description' ), - Post_Loader::GUID => $this->extract_by_xpath( $item, $settings[ static::PATH_GUID ] ?? 'guid' ), - Post_Loader::IMAGE => $this->extract_by_xpath( $item, $settings[ static::PATH_IMAGE ] ?? 'image' ), - Post_Loader::IMAGE_CAPTION => $this->extract_by_xpath( $item, $settings[ static::PATH_IMAGE_CAPTION ] ?? 'image_caption' ), - Post_Loader::IMAGE_CREDIT => $this->extract_by_xpath( $item, $settings[ static::PATH_IMAGE_CREDIT ] ?? 'image_credit' ), - Post_Loader::IMAGE_DESCRIPTION => $this->extract_by_xpath( $item, $settings[ static::PATH_IMAGE_DESCRIPTION ] ?? 'image_description' ), - Post_Loader::PERMALINK => $this->extract_by_xpath( $item, $settings[ static::PATH_PERMALINK ] ?? 'link' ), - Post_Loader::TITLE => $this->extract_by_xpath( $item, $settings[ static::PATH_TITLE ] ?? 'title' ), - ], - (array) $items, - ); + $processor_cursor = null; + + // Determine the processor's cursor timestamp. + if ( $this->processor && $this->processor instanceof With_Cursor ) { + $processor_cursor = $this->processor->get_cursor(); + + // Support a numeric cursor OR a date cursor. + if ( ! is_null( $processor_cursor ) && is_numeric( $processor_cursor ) ) { + $processor_cursor = (int) $processor_cursor; + } elseif ( ! is_null( $processor_cursor ) ) { + $processor_cursor = Carbon::parse( $processor_cursor ); + } + } + + $items = collect( (array) $items ) + ->map( + fn ( SimpleXMLElement $item ) => [ + 'cursor' => $this->extract_by_xpath( $item, $settings[ static::PATH_CURSOR ] ?? '' ), + Post_Loader::BYLINE => $this->extract_by_xpath( $item, $settings[ static::PATH_BYLINE ] ?? 'author' ), + Post_Loader::CONTENT => empty( $settings[ static::DONT_CONVERT_TO_BLOCKS ] ) + ? (string) new Block_Converter( $this->extract_by_xpath( $item, $settings[ static::PATH_CONTENT ] ?? 'description' ) ) + : $this->extract_by_xpath( $item, $settings[ static::PATH_CONTENT ] ?? 'description' ), + Post_Loader::GUID => $this->extract_by_xpath( $item, $settings[ static::PATH_GUID ] ?? 'guid' ), + Post_Loader::IMAGE => $this->extract_by_xpath( $item, $settings[ static::PATH_IMAGE ] ?? 'image' ), + Post_Loader::IMAGE_CAPTION => $this->extract_by_xpath( $item, $settings[ static::PATH_IMAGE_CAPTION ] ?? 'image_caption' ), + Post_Loader::IMAGE_CREDIT => $this->extract_by_xpath( $item, $settings[ static::PATH_IMAGE_CREDIT ] ?? 'image_credit' ), + Post_Loader::IMAGE_DESCRIPTION => $this->extract_by_xpath( $item, $settings[ static::PATH_IMAGE_DESCRIPTION ] ?? 'image_description' ), + Post_Loader::PERMALINK => $this->extract_by_xpath( $item, $settings[ static::PATH_PERMALINK ] ?? 'link' ), + Post_Loader::TITLE => $this->extract_by_xpath( $item, $settings[ static::PATH_TITLE ] ?? 'title' ), + ], + ) + ->filter( + function ( array $item ) use ( $processor_cursor ) { + // Check if the processor supports a cursor or if one is set. + if ( is_null( $processor_cursor ) ) { + return true; + } + + // Check if the item has a cursor set. + if ( ! isset( $item['cursor'] ) ) { + return true; + } + + // Check if the item's cursor is newer than the processor's cursor. + $cursor = is_numeric( $item['cursor'] ) ? (int) $item['cursor'] : Carbon::parse( $item['cursor'] ); + + return $cursor > $processor_cursor; + } + ) + ->values() + ->all(); // Update the processor's cursor if supported. if ( $this->processor && $this->processor instanceof With_Cursor ) { diff --git a/tests/transformer/test-rss-transformer.php b/tests/transformer/test-rss-transformer.php index 11aaa32e..0a498822 100644 --- a/tests/transformer/test-rss-transformer.php +++ b/tests/transformer/test-rss-transformer.php @@ -64,4 +64,27 @@ public function test_rss_transformation_error() { $this->assertCount( 0, $data ); } + + public function test_rss_transformer_with_cursor() { + $processor = $this->make_processor(); + + // Setting the cursor relative to the 3rd item in the feed (it should only include 1-2). + $processor->set_cursor( 'Fri, 08 Apr 2022 19:18:04 +0000' ); + + $extractor = $this->make_extractor( + Mock_Http_Response::create() + ->with_header( 'Content-Type', 'application/rss+xml' ) + ->with_body( file_get_contents( __DIR__ . '/../fixtures/rss-feed.xml' ) ), + $processor, + ); + + $transformer = new RSS_Transformer( $processor, $extractor ); + + $transformer->set_processor( $processor ); + $transformer->set_extractor( $extractor ); + + $data = $transformer->data(); + + $this->assertCount( 2, $data ); + } } From ed0afd421b7bfd3126cb6f11a583688f2a5dd5ea Mon Sep 17 00:00:00 2001 From: Sean Fisher Date: Wed, 21 Dec 2022 15:35:55 -0500 Subject: [PATCH 3/5] Removing the need for carbon --- composer.json | 3 +-- src/transformer/class-xml-transformer.php | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/composer.json b/composer.json index af138cf7..3d94862c 100644 --- a/composer.json +++ b/composer.json @@ -18,8 +18,7 @@ "php": "^8.0", "alleyinteractive/composer-wordpress-autoloader": "^1.0", "alleyinteractive/wp-block-converter": "^1.0", - "mantle-framework/support": "^0.9.1", - "nesbot/carbon": "^2.64" + "mantle-framework/support": "^0.9.1" }, "require-dev": { "alleyinteractive/alley-coding-standards": "^1.0", diff --git a/src/transformer/class-xml-transformer.php b/src/transformer/class-xml-transformer.php index ba3fb5ed..87fd8216 100644 --- a/src/transformer/class-xml-transformer.php +++ b/src/transformer/class-xml-transformer.php @@ -8,7 +8,6 @@ namespace Feed_Consumer\Transformer; use Alley\WP\Block_Converter\Block_Converter; -use Carbon\Carbon; use Feed_Consumer\Contracts\With_Cursor; use Feed_Consumer\Contracts\With_Presets; use Feed_Consumer\Contracts\With_Setting_Fields; @@ -104,7 +103,7 @@ public function data(): array { if ( ! is_null( $processor_cursor ) && is_numeric( $processor_cursor ) ) { $processor_cursor = (int) $processor_cursor; } elseif ( ! is_null( $processor_cursor ) ) { - $processor_cursor = Carbon::parse( $processor_cursor ); + $processor_cursor = strtotime( $processor_cursor ); } } @@ -138,7 +137,7 @@ function ( array $item ) use ( $processor_cursor ) { } // Check if the item's cursor is newer than the processor's cursor. - $cursor = is_numeric( $item['cursor'] ) ? (int) $item['cursor'] : Carbon::parse( $item['cursor'] ); + $cursor = is_numeric( $item['cursor'] ) ? (int) $item['cursor'] : strtotime( $item['cursor'] ); return $cursor > $processor_cursor; } From 92a935ad164ff482064b3433f17c58506d9ee7ff Mon Sep 17 00:00:00 2001 From: Sean Fisher Date: Wed, 21 Dec 2022 15:36:36 -0500 Subject: [PATCH 4/5] Moving to a new line for readability --- src/transformer/class-xml-transformer.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/transformer/class-xml-transformer.php b/src/transformer/class-xml-transformer.php index 87fd8216..9b2b5082 100644 --- a/src/transformer/class-xml-transformer.php +++ b/src/transformer/class-xml-transformer.php @@ -137,7 +137,9 @@ function ( array $item ) use ( $processor_cursor ) { } // Check if the item's cursor is newer than the processor's cursor. - $cursor = is_numeric( $item['cursor'] ) ? (int) $item['cursor'] : strtotime( $item['cursor'] ); + $cursor = is_numeric( $item['cursor'] ) + ? (int) $item['cursor'] + : strtotime( $item['cursor'] ); return $cursor > $processor_cursor; } From 09832f16e182668725c0059f65b72d1a306e5f6a Mon Sep 17 00:00:00 2001 From: Sean Fisher Date: Wed, 21 Dec 2022 15:37:09 -0500 Subject: [PATCH 5/5] Use Mantles version --- src/transformer/class-xml-transformer.php | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/transformer/class-xml-transformer.php b/src/transformer/class-xml-transformer.php index 9b2b5082..5509d8ab 100644 --- a/src/transformer/class-xml-transformer.php +++ b/src/transformer/class-xml-transformer.php @@ -15,6 +15,8 @@ use Fieldmanager_TextField; use SimpleXMLElement; +use function Mantle\Support\Helpers\collect; + /** * XML Transformer * @@ -144,19 +146,18 @@ function ( array $item ) use ( $processor_cursor ) { return $cursor > $processor_cursor; } ) - ->values() - ->all(); + ->values(); // Update the processor's cursor if supported. if ( $this->processor && $this->processor instanceof With_Cursor ) { - $last_item = end( $items ); + $last_item = $items->last(); if ( ! empty( $last_item['cursor'] ) ) { $this->processor->set_cursor( $last_item['cursor'] ); } } - return $items; + return $items->all(); } /**