From 66850efbefcbd9d2b76814f0e0c0e836c5dbe785 Mon Sep 17 00:00:00 2001 From: Markus Perndorfer Date: Fri, 30 Apr 2021 08:58:00 +0200 Subject: [PATCH] update cleanup --- CHANGELOG.md | 72 +++- README.md | 359 +++++++++++++++++- buildutils/update-readme.py | 94 +++++ resources/impex/bulkdelete-cronjoblogs.impex | 3 +- resources/impex/essentialdata-jobs.impex | 2 +- .../001-cleanup-emails.impex | 79 ++++ .../cms2/001-optimized-versiongc.impex | 25 +- .../001-cleanup-oldcarts.impex | 48 ++- .../core/001-cleanup-httpsession.impex | 12 +- .../sanecleanup/impex/001-cleanup-impex.impex | 30 ++ .../sanecleanup/impex/001_cleanup-impex.impex | 14 - .../impex/002-cleanup-distributed-impex.impex | 102 +++++ .../001-enable-cronjoblogs-cleanup.impex | 32 +- .../001-cleanup-cronjobhistory.impex | 21 +- .../processing/002-cleanup-cronjobs.impex | 39 +- .../003-cleanup-businessprocess.impex | 147 ++++++- .../ruleengine/002-delete-expired-rules.impex | 14 +- .../003-rule-engine-orphans.impex.draft | 23 ++ .../002-cleanup-solrindexoperation.impex | 26 ++ resources/retentionrule-to-impex.groovy | 43 +++ resources/sanecleanup-spring.xml | 5 +- ...ner.java => CleanupAfterInitListener.java} | 100 +++-- .../cleanup/cms2/CMSVersionGCPerformable.java | 187 ++++----- 23 files changed, 1280 insertions(+), 197 deletions(-) create mode 100755 buildutils/update-readme.py create mode 100644 resources/impex/sanecleanup/acceleratorservices/001-cleanup-emails.impex create mode 100644 resources/impex/sanecleanup/impex/001-cleanup-impex.impex delete mode 100644 resources/impex/sanecleanup/impex/001_cleanup-impex.impex create mode 100644 resources/impex/sanecleanup/impex/002-cleanup-distributed-impex.impex create mode 100644 resources/impex/sanecleanup/ruleengine/003-rule-engine-orphans.impex.draft create mode 100644 resources/impex/sanecleanup/solrfacetsearch/002-cleanup-solrindexoperation.impex create mode 100644 resources/retentionrule-to-impex.groovy rename src/mpern/sap/cleanup/{AfterInitListener.java => CleanupAfterInitListener.java} (55%) diff --git a/CHANGELOG.md b/CHANGELOG.md index fcb3bfe..bbe9a51 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ # Changelog + All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), @@ -6,6 +7,51 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [3.0.0] - 2021-04-30 + +### Added + +- Cleanup all `BusinessProcess` - all `BusinessProcess`es, regardless of their state, + are deleted after 6 months at the latest. **Make sure to adjust this to your project + requirements!** +- Cleanup potentially orphaned items related to `BusinessProcess` +- Aggressive cleanup for `cmsVersionGCProcess` +- Cleanup additional generated impex media +- Cleanup `EmailMessage` and `EmailAddress` +- Cleanup `SolrIndexOperation` +- Cleanup all types related to [Distributed ImpEx](https://help.sap.com/viewer/d0224eca81e249cb821f2cdf45a82ace/LATEST/en-US/3e0138c9bfc642349cad227cfcd72d9f.html) +- `retentionrule-to-impex.groovy` - helper script that takes the results of a `FlexibleSearchRetentionRule` and delete + the outdated items via impex. Useful for bulk cleanup. +- README now documents queries to analyze outdated/stale data + +### Changed + +- CMS Version Garbage Collection Job + + - renamed to `jdbcVersionGCCronJob` / `jdbcVersionGCJob` + - optimized cleanup logic + - dynamically determine correct DB table names using the type system + +- Simplify cronjob retention rule (`cronJobRule`) +- Cleanup CronJobs now execute between 00:00 - 06:00 +- Longer retention period (4 weeks) for successfully finished `BusinessProcess` + +### Fixed + +- CMS Version Garbage Collection Job - job is now abortable for real + +### Upgrade Guide + +- Delete old CMS Version GC Job definition + + ```impex + REMOVE CronJob;code[unique=true];job(code)[unique=true] + ;cmsVersionGCCronJob;cmsVersionGCJob; + + REMOVE ServicelayerJob;code[unique=true];springId[unique=true]; + ;cmsVersionGCJob;cmsVersionGCPerformable; + ``` + ## [2.0.0] - 2021-03-23 ### Changed @@ -17,16 +63,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Custom retention cronjob to replace CMS Version [Garbage Collection][versiongc].\ The ootb garbage collection mechanism is over-engineered and should be a regular cronjob. -- Retention rules and jobs for the promotion engine, based on [Top 10 Recommendations for Improving the Performance of your Commerce Cloud Promotion Engine][top10] +- Retention rules and jobs for the promotion engine, based on + [Top 10 Recommendations for Improving the Performance of your Commerce Cloud Promotion Engine][top10] [versiongc]: https://help.sap.com/viewer/9d346683b0084da2938be8a285c0c27a/2011/en-US/9089116335ac4f4d8708e0c5516531e3.html [top10]: https://www.sap.com/cxworks/article/538808299/top_10_recommendations_for_improving_the_performance_of_your_commerce_cloud_promotion_engine -## [1.0.1] - 22020-12-09 +## [1.0.1] - 2020-12-09 ### Added -- Bulk cleanup cronjob for log files - useful for a one-time cleanup before the retention +- Bulk cleanup cronjob for log files - useful for a one-time cleanup before the retention job for job logs is enabled ## [1.0.0] - 2020-11-26 @@ -37,16 +84,17 @@ Initial release - Cleanup for: - - CronJobs - - CronJob Histories - - Lob Logs / Log Files - - Impex Media - - HTTP Sessions - - Business Processes - - Carts + - CronJobs + - CronJob Histories + - Lob Logs / Log Files + - Impex Media + - HTTP Sessions + - Business Processes + - Carts -[Unreleased]: https://github.com/sap-commerce-tools/sanecleanup/compare/v2.0.0...HEAD + +[Unreleased]: https://github.com/sap-commerce-tools/sanecleanup/compare/v3.0.0...HEAD +[3.0.0]: https://github.com/sap-commerce-tools/sanecleanup/compare/v2.0.0...v3.0.0 [2.0.0]: https://github.com/sap-commerce-tools/sanecleanup/compare/v1.0.1...v2.0.0 [1.0.1]: https://github.com/sap-commerce-tools/sanecleanup/compare/v1.0.0...v1.0.1 [1.0.0]: https://github.com/sap-commerce-tools/sanecleanup/releases/tag/v1.0.0 - diff --git a/README.md b/README.md index 9258fd7..83c29cb 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Sensible defaults for data retention and cleanup for SAP Commerce, based on my CX Works article [Data Maintenance and Cleanup][article] - +## How-To 1. Download the latest release 1. Unpack to `hybris/bin/custom` @@ -14,7 +14,7 @@ Sensible defaults for data retention and cleanup for SAP Commerce, based on my C ````xml ```` - +1. :red_circle: Adapt the retention rules to your project requirements :red_circle: 1. Build and deploy.\ (The rules will be automatically imported during system update) @@ -27,7 +27,360 @@ Consider performing a [one-time cleanup][one] before adding the extension / enab Especially the first run of `cronJobLogCleanupCronJob` will take a _very_ long time, if you have never removed any cronjob log files (type `LogFile`).\ Please consider importing and executing the script job defined in [bulkdelete-cronjoblogs.impex](resources/impex/bulkdelete-cronjoblogs.impex) **before** you set up the automated cleanup!\ The job will remove all log files except the five most recent logs per CronJob. -(Disclaimer: the script was tested on MS SQL / Azure SQL. It is not guaranteed to work for other databases) +(Disclaimer: the script was tested on MS SQL / Azure SQL and SAP HANA. It is not guaranteed to work for other databases) + +## Do I have to clean up? + +If have never even thought about that topic - yes! + +Here are some queries and "rules of thumb" that help you investigate further. + + + + + + + + + + + + + + + + +
Type(s)QueryNotes
AbstractRule + +```sql +SELECT + COUNT({ar:pk}), + MIN({ar:modifiedtime}) AS "oldest", + MAX({ar:modifiedtime}) AS "newest" +FROM + {AbstractRule AS ar}, + {RuleStatus AS rs} +WHERE + {ar:status} = {rs:pk} + AND {rs:code} = 'PUBLISHED' + AND {ar:enddate} IS NOT NULL + AND {ar:enddate} < getutcdate() +``` + + + +Are there any outdated rules? i.e rules that aren't valid anymore because their enddate is in the past. + +Warning: change `getutcdate()` to your DBMS (for HANA/MySQL: `now()` ) + +
BusinessProcess + +```sql +SELECT + {p:processDefinitionName}, + {s:code} AS "status", + COUNT({p:pk}) AS "total", + MIN({p:modifiedTime}) AS "oldest", + MAX({p:modifiedTime}) AS "newest" +FROM + {BusinessProcess AS p + LEFT JOIN + ProcessState AS s + ON {p:state} = {s:pk} } +GROUP BY + {p:processDefinitionName}, + {s:code} +ORDER BY + "total" DESC +``` + + + +Are there too many (let's say > 1000) or very old BusinessProcess in your system? + +Also, if a lot of processes are stuck in "RUNNING" / "WAITING", you have to investigate what's wrong. +(What is causing your processes to be stuck?) + +
Cart + +```sql +SELECT + {b:uid} AS "BaseSite", + {u:uid} AS "USER", + CASE + WHEN + {c:saveTime} IS NULL + THEN + 'regular' + ELSE + 'saved' + END + AS "cart type", + COUNT({c:pk}) AS "total", + MIN({c:modifiedtime}) AS "oldest", + MAX({c:modifiedtime}) AS "newest" +FROM + { Cart AS c + LEFT JOIN + USER AS u + ON {c:user} = {u:pk} + LEFT JOIN + BaseSite AS b + ON {c:site} = {b:pk} } +GROUP BY + {b:uid}, {u:uid}, + CASE + WHEN + {c:saveTime} IS NULL + THEN + 'regular' + ELSE + 'saved' + END +ORDER BY + "total" DESC +``` + + + +- Are there excessive amount of carts per site or per user? +- Too many saved carts? +- Stale (= old) carts? + +
CronJob (auto-generated) + +```sql +SELECT + {t:code} AS "CronJob Type", + COUNT({c:pk}) AS "total", + MIN({c:modifiedtime}) AS "oldest", + MAX({c:modifiedtime}) AS "newest" +FROM + {CronJob AS c + JOIN + ComposedType AS t + ON {c:itemtype} = {t:pk} + LEFT JOIN + TRIGGER AS trg + ON {trg:cronjob} = {c:pk} } +WHERE + {trg:pk} IS NULL + AND {c:code} LIKE '00%' + AND {t:code} IN + ( + 'ImpExImportCronJob', + 'CatalogVersionSyncCronJob', + 'SolrIndexerCronJob' + ) +GROUP BY + {t:code} +ORDER BY + "total" DESC +``` + + + +Are there too many (>10) outdated, auto-geneated jobs in your system? + +
CronJobHistory + +```sql +SELECT + {cj:code}, + COUNT({h:pk}) AS "total", + MIN({h:modifiedtime}) AS "oldest", + MAX({h:modifiedtime}) AS "newest" +FROM + {cronjobhistory AS h + JOIN + cronjob AS cj + ON {h:cronjob} = {cj:pk} } +GROUP BY + {cj:code} +ORDER BY + "total" DESC +``` + + + +Is there any job with > 50 histories and/or histories older than an hour? + +This cleanup is enabled by default in recent SAP Commerce patch releases, so this query shouldn't find anything. + +
EmailMessage + +```sql +SELECT + {bp:processDefinitionName} AS "source process", + {m:sent}, + COUNT({m:pk}) AS "total", + MIN({m:modifiedtime}) AS "oldest", + MAX({m:modifiedtime}) AS "newest" +FROM + {EmailMessage AS m + LEFT JOIN + BusinessProcess AS bp + ON {m:process} = {bp:pk} } +GROUP BY + {bp:processDefinitionName}, + {m:sent} +ORDER BY + "total" DESC +``` + + + +- Are there more than a handful sent/unsent messages? +- Are there messages that do not belong to any process? + +
ImpExImportCronJob (distributed impex) + +```sql +SELECT + {s:code} AS "status", + COUNT({i:pk}) AS "total", + MIN({i:modifiedtime}) AS "oldest", + MAX({i:modifiedtime}) AS "newest" +FROM + {ImpExImportCronJob AS i + LEFT JOIN + CronJobStatus AS s + ON {i:status} = {s:pk} } +WHERE + {i:code} LIKE 'distributed-impex-%' +GROUP BY + {s:code} +``` + + + +- More than ~10 `FINISHED` distributed impex jobs? +- More than a few `PAUSED` jobs? You may have a faulty distributed impex script. + +
ImpexMedia + +```sql +SELECT + COUNT(*) +FROM + {ImpexMedia AS i} +WHERE + ( + {i:code} LIKE '0_______' + OR {i:code} LIKE 'generated impex media - %' + ) +``` + + + +Are there more than a handful (>100) of generated impex medias? + +
ImportBatchContent + +```sql +SELECT + COUNT({c:pk}) AS "total", + MIN({c:modifiedTime}) AS "oldest", + MAX({c:modifiedTime}) AS "newest" +FROM + {ImportBatchContent AS c + LEFT JOIN + ImportBatch AS b + ON {b:importContentCode} = {c:code} } +WHERE + {b:pk} IS NULL +``` + + + +Are there any left-over distributed import batches? + +
LogFile + +```sql +SELECT + COALESCE({cj:code}, ''), + COUNT({l:pk}) AS "total", + MIN({l:modifiedtime}) AS "oldest", + MAX({l:modifiedtime}) AS "newest" +FROM + {LogFile AS l + LEFT JOIN + CronJob AS cj + ON {l:owner} = {cj:pk} } +GROUP BY + {cj:code} +ORDER BY + "total" DESC +``` + + + +Are there are cronjob with more than ~10 logs and/or logs older than 14 days? +(those are default values for log file retention) + +
SolrIndexOperation + +```sql +SELECT + {i:qualifier}, + COUNT({o:pk}) AS "total", + MIN({o:modifiedTime}) AS "oldest", + MAX({o:modifiedTime}) AS "newest" +FROM + {SolrIndexOperation AS o + LEFT JOIN + SolrIndex AS i + ON {o:index} = {i:pk} } +GROUP BY + {i:qualifier} +ORDER BY + "total" DESC +``` + + + +Too many solr operations (more than ~100 per index)? + +
StoredHttpSession + +```sql +SELECT + COUNT({s:pk}) AS "total", + MIN({s:modifiedtime}) AS "oldest", + MAX({s:modifiedtime}) AS "newest" +FROM + {StoredHttpSession AS s} +``` + + + +Excessive amount of session? This is hard to generalize as it highly depends on your site's traffic, but if you are near or over 5 digits, it's probably too much. + +Simarly, stale sessions (e.g older than a day) don't need to be retained. + +
TaskCondition + +```sql +SELECT + COUNT({tc:pk}), + MIN({tc:modifiedtime}) AS "oldest", + MAX({tc:modifiedtime}) AS "newest" +FROM + {TaskCondition AS tc } +WHERE + {tc:task} IS NULL +``` + + + +Is there an excessive amount of "premature events"? Or very old (older than a a few weeks) events? + +https://help.sap.com/viewer/d0224eca81e249cb821f2cdf45a82ace/2011/en-US/7e8ff9d7653f43e8890bc8eb395d52a7.html + +
+ + ## Support diff --git a/buildutils/update-readme.py b/buildutils/update-readme.py new file mode 100755 index 0000000..54c5442 --- /dev/null +++ b/buildutils/update-readme.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 + +import re +import os +from os.path import join +import urllib.request +from urllib.parse import urlencode + +def prettySQL(raw): + # return raw + # curl https://www.freeformatter.com/sql-formatter.html \ + # -d 'forceInNewWindow=true' \ + # -d 'sqlKeywordsCase=UPPER_CASE' \ + # -d 'indentation=TWO_SPACES' \ + # -d 'sqlString=select count(*) from foo' + params = urlencode({'forceInNewWindow': 'true', + 'sqlKeywordsCase': 'UPPER_CASE', + 'indentation': 'TWO_SPACES', + 'sqlString': raw}) + data = params.encode('utf-8') + with urllib.request.urlopen("https://www.freeformatter.com/sql-formatter.html", data) as f: + return f.read().decode('utf-8') + + +impexes = set() +for root, dirs, files in os.walk('resources'): + for f in files: + if f.endswith(".impex"): + impexes.add(join(root, f)) + +print(impexes) + +blocks = [] +for impex in impexes: + with open(impex) as i: + content = i.read() + pattern = re.compile(r"@readme(.+?)INSERT_UPDATE", flags=re.DOTALL) + for match in pattern.finditer(content): + lines = match.group(0).split('\n') + header = lines[0] + types = [f.strip() for f in header[7:].split(',')] + text = "" + query = "" + for l in [l.rstrip() for l in lines[1:-1]]: + l = re.sub("^# ?", "", l) + if l.lower().strip().startswith('select'): + query += l + '\n' + continue + if len(query) > 0: + query += l + '\n' + else: + text += l + '\n' + query = prettySQL(query) + blocks.append({'file': impex, 'types': types, 'text': text.strip(), 'query': query.strip()}) + +blocks.sort(key=lambda entry: "-".join(entry['types'])) +print(blocks) + +table = "" +table += "\n" +for block in blocks: + table += "" + table += f"" + table += f"""""" + table += "\n" +table += "
Type(s)QueryNotes
{','.join(block['types'])} + +```sql +{block['query']} +``` + + + +{block['text']} + +
" + +print(table) + +newContent = "" +with open('README.md', 'r') as old: + content = old.read() + newContent = re.sub(r'().+?()', f""" +{table} +""", content, flags=re.DOTALL) + print(newContent) + +if newContent: + with open('README.md', 'w') as new: + new.write(newContent) + + + + diff --git a/resources/impex/bulkdelete-cronjoblogs.impex b/resources/impex/bulkdelete-cronjoblogs.impex index 920120a..5436163 100644 --- a/resources/impex/bulkdelete-cronjoblogs.impex +++ b/resources/impex/bulkdelete-cronjoblogs.impex @@ -7,7 +7,8 @@ import de.hybris.platform.cronjob.enums.CronJobStatus import de.hybris.platform.cronjob.enums.CronJobResult // all log files EXCEPT the five most recent logs per cronjob -// warning: query uses MS SQL dialact for partioning the logs per cronjob +// warning: query uses DBMS-specific dialact for partioning the logs per cronjob +// query was tested on HANA and MS SQL def QUERY = ''' SELECT t.pk FROM diff --git a/resources/impex/essentialdata-jobs.impex b/resources/impex/essentialdata-jobs.impex index 01ee46e..9a2c8eb 100644 --- a/resources/impex/essentialdata-jobs.impex +++ b/resources/impex/essentialdata-jobs.impex @@ -1,3 +1,3 @@ INSERT_UPDATE ServicelayerJob;code[unique=true];springId; -;cmsVersionGCJob;cmsVersionGCPerformable; +;jdbcVersionGCJob;jdbcVersionGCPerformable; diff --git a/resources/impex/sanecleanup/acceleratorservices/001-cleanup-emails.impex b/resources/impex/sanecleanup/acceleratorservices/001-cleanup-emails.impex new file mode 100644 index 0000000..9793d85 --- /dev/null +++ b/resources/impex/sanecleanup/acceleratorservices/001-cleanup-emails.impex @@ -0,0 +1,79 @@ +# Import config properties into impex macros +UPDATE GenericItem[processor=de.hybris.platform.commerceservices.impex.impl.ConfigPropertyImportProcessor];pk[unique=true] +$sessionLanguage=$config-sanecleanup.jobs.sessionlanguage + +$twoWeeks = 1209600 + +# @readme EmailMessage +# - Are there more than a handful sent/unsent messages? +# - Are there messages that do not belong to any process? +# SELECT +# {bp:processDefinitionName} AS "source process", +# {m:sent}, +# COUNT({m:pk}) AS "total", +# MIN({m:modifiedtime}) AS "oldest", +# MAX({m:modifiedtime}) AS "newest" +# FROM +# {EmailMessage AS m +# LEFT JOIN +# BusinessProcess AS bp +# ON {m:process} = {bp:pk} } +# GROUP BY +# {bp:processDefinitionName}, {m:sent} +# ORDER BY +# "total" DESC +INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true]; searchQuery; retentionTimeSeconds; actionReference ; +; emailMessageRule ; " + SELECT {m:pk}, {m:itemtype} + FROM {EmailMessage AS m LEFT JOIN BusinessProcess AS bp ON {m:process} = {bp:pk}} + WHERE {bp:pk} IS NULL + AND {m:modifiedTime} < ?CALC_RETIREMENT_TIME" ; $twoWeeks ; basicRemoveCleanupAction ; +INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code); batchSize +; emailMessageCleanupJob ; emailMessageRule ; 1000 +INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = $sessionLanguage] +; emailMessageCleanupCronJob ; emailMessageCleanupJob ; +INSERT Trigger; cronJob(code)[unique = true]; cronExpression +# every day at 02:00 +; emailMessageCleanupCronJob ; 0 0 2 * * ? + + +INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true]; searchQuery; retentionTimeSeconds; actionReference ; +; emailAddressRule ; " +SELECT DISTINCT({a:pk}), {a:itemType} from { +EmailAddress AS a +LEFT JOIN EmailMessage2ToAddressesRel AS to ON {to:target} = {a:pk} +LEFT JOIN EmailMessage2CcAddressesRel AS cc ON {cc:target} = {a:pk} +LEFT JOIN EmailMessage2BccAddressesRel AS bcc ON {bcc:target} = {a:pk} +LEFT JOIN EmailMessage AS m ON {m:fromAddress} = {a:pk} +} +WHERE {to:source} IS NULL + AND {cc:source} IS NULL + AND {bcc:source} IS NULL + AND {m:pk} IS NULL" ; 0 ; basicRemoveCleanupAction ; +INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code); batchSize +; emailAddressCleanupJob ; emailAddressRule ; 1000 +INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = $sessionLanguage] +; emailAddressCleanupCronJob ; emailAddressCleanupJob ; +INSERT Trigger; cronJob(code)[unique = true]; cronExpression +# every day at 02:00 +; emailAddressCleanupCronJob ; 0 0 2 * * ? + + +# EmailAttachment +INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true]; searchQuery; retentionTimeSeconds; actionReference ; +; emailAttachmentRule ; " +SELECT {a:pk}, {a:itemType} from { +EmailAttachment AS a +LEFT JOIN EmailMessage AS m ON {a:message} = {m:pk} +} +WHERE {m:pk} IS NULL" ; 0 ; basicRemoveCleanupAction ; +INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code); batchSize +; emailAttachmentCleanupJob ; emailAttachmentRule ; 1000 +INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = $sessionLanguage] +; emailAttachmentCleanupCronJob ; emailAttachmentCleanupJob ; +INSERT Trigger; cronJob(code)[unique = true]; cronExpression +# every day at 02:00 +; emailAttachmentCleanupCronJob ; 0 0 2 * * ? + + + diff --git a/resources/impex/sanecleanup/cms2/001-optimized-versiongc.impex b/resources/impex/sanecleanup/cms2/001-optimized-versiongc.impex index 6c663ca..e66baf1 100644 --- a/resources/impex/sanecleanup/cms2/001-optimized-versiongc.impex +++ b/resources/impex/sanecleanup/cms2/001-optimized-versiongc.impex @@ -2,10 +2,25 @@ # Import config properties into impex macros UPDATE GenericItem[processor=de.hybris.platform.commerceservices.impex.impl.ConfigPropertyImportProcessor];pk[unique=true] $sessionLanguage=$config-sanecleanup.jobs.sessionlanguage -$cronExpression=$config-version.gc.cron -INSERT_UPDATE CronJob;code[unique=true];job(code);sessionLanguage(isoCode)[default = $sessionLanguage] -;cmsVersionGCCronJob;cmsVersionGCJob; +INSERT_UPDATE CronJob;code[unique=true];job(code);queryCount;sessionLanguage(isoCode)[default = $sessionLanguage] +;jdbcVersionGCCronJob;jdbcVersionGCJob;1000; -INSERT_UPDATE Trigger;cronjob(code)[unique=true];cronExpression -;cmsVersionGCCronJob; $cronExpression +INSERT Trigger;cronjob(code)[unique=true];cronExpression +;jdbcVersionGCCronJob; 0 0 0 * * ? + +# delete cms version gc business processes +$oneDay=86400 +INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true] ; searchQuery; retentionTimeSeconds; actionReference ; +; cmsVersionGCProcessRule ; " + SELECT {p:pk}, {p:itemtype} + FROM {BusinessProcess AS p } + WHERE {p:code} LIKE 'cmsVersionGCProcess%' + AND {p:modifiedTime} < ?CALC_RETIREMENT_TIME" ; $oneDay ; basicRemoveCleanupAction ; +INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code) ; batchSize +; cmsVersionGCProcessCleanupJob ; cmsVersionGCProcessRule ; 1000 +INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = $sessionLanguage] +; cmsVersionGCProcessCleanupCronJob ; cmsVersionGCProcessCleanupJob ; +INSERT Trigger; cronJob(code)[unique = true] ; cronExpression +# every day at 03:00 +; cmsVersionGCProcessCleanupCronJob ; 0 0 3 * * ? diff --git a/resources/impex/sanecleanup/commerceservices/001-cleanup-oldcarts.impex b/resources/impex/sanecleanup/commerceservices/001-cleanup-oldcarts.impex index f1821a1..15855db 100644 --- a/resources/impex/sanecleanup/commerceservices/001-cleanup-oldcarts.impex +++ b/resources/impex/sanecleanup/commerceservices/001-cleanup-oldcarts.impex @@ -10,6 +10,46 @@ $sessionLanguage=$config-sanecleanup.jobs.sessionlanguage $twoWeeks = 1209600 $fourWeeks = 2419200 + +# @readme Cart +# - Are there excessive amount of carts per site or per user? +# - Too many saved carts? +# - Stale (= old) carts? +# SELECT +# {b:uid} AS "BaseSite", +# {u:uid} AS "User", +# CASE +# WHEN +# {c:saveTime} IS NULL +# THEN +# 'regular' +# ELSE +# 'saved' +# END +# AS "cart type", +# COUNT({c:pk}) AS "total", +# MIN({c:modifiedtime}) AS "oldest", +# MAX({c:modifiedtime}) AS "newest" +# FROM +# { Cart AS c +# LEFT JOIN +# USER AS u +# ON {c:user} = {u:pk} +# LEFT JOIN +# BaseSite AS b +# ON {c:site} = {b:pk} } +# GROUP BY +# {b:uid}, {u:uid}, +# CASE +# WHEN +# {c:saveTime} IS NULL +# THEN +# 'regular' +# ELSE +# 'saved' +# END +# ORDER BY +# "total" DESC INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true]; searchQuery; retentionTimeSeconds; actionReference ; ; cartRule ; " SELECT {c:pk}, {c:itemtype} @@ -30,7 +70,7 @@ INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code); batch INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = $sessionLanguage] ; cartCleanupCronJob ; cartCleanupJob ; ; anonymousCartCleanupCronJob ; anonymousCartCleanupJob ; -INSERT_UPDATE Trigger; cronJob(code)[unique = true]; cronExpression -# every day at midnight - ; cartCleanupCronJob ; 0 0 0 * * ? - ; anonymousCartCleanupCronJob ; 0 0 0 * * ? +INSERT Trigger; cronJob(code)[unique = true]; cronExpression +# every day at 03:00 + ; cartCleanupCronJob ; 0 0 3 * * ? + ; anonymousCartCleanupCronJob ; 0 0 3 * * ? diff --git a/resources/impex/sanecleanup/core/001-cleanup-httpsession.impex b/resources/impex/sanecleanup/core/001-cleanup-httpsession.impex index 1a61a95..c1f4b2c 100644 --- a/resources/impex/sanecleanup/core/001-cleanup-httpsession.impex +++ b/resources/impex/sanecleanup/core/001-cleanup-httpsession.impex @@ -5,6 +5,16 @@ UPDATE GenericItem[processor = de.hybris.platform.commerceservices.impex.impl.Co $sessionLanguage = $config-sanecleanup.jobs.sessionlanguage $oneDay = 86400 + +# @readme StoredHttpSession +# Excessive amount of session? This is hard to generalize as it highly depends on your site's traffic, but if you are near or over 5 digits, it's probably too much. +# +# Simarly, stale sessions (e.g older than a day) don't need to be retained. +# SELECT +# COUNT({s:pk}) AS "total", +# MIN({s:modifiedtime}) AS "oldest", +# MAX({s:modifiedtime}) AS "newest" +# FROM {StoredHttpSession AS s} INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true] ; searchQuery; retentionTimeSeconds; actionReference ; ; storedHttpSessionRule ; "select {s:pk}, {s:itemtype} from {StoredHttpSession as s} @@ -13,6 +23,6 @@ INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code) ; storedHttpSessionCleanupJob ; storedHttpSessionRule ; 1000 INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = $sessionLanguage] ; storedHttpSessionCleanupCronJob ; storedHttpSessionCleanupJob ; -INSERT_UPDATE Trigger; cronJob(code)[unique = true] ; cronExpression +INSERT Trigger; cronJob(code)[unique = true] ; cronExpression # every 30 minutes ; storedHttpSessionCleanupCronJob ; 0 0/30 * * * ? diff --git a/resources/impex/sanecleanup/impex/001-cleanup-impex.impex b/resources/impex/sanecleanup/impex/001-cleanup-impex.impex new file mode 100644 index 0000000..a475447 --- /dev/null +++ b/resources/impex/sanecleanup/impex/001-cleanup-impex.impex @@ -0,0 +1,30 @@ +# Import config properties into impex macros +UPDATE GenericItem[processor=de.hybris.platform.commerceservices.impex.impl.ConfigPropertyImportProcessor];pk[unique=true] +$sessionLanguage=$config-sanecleanup.jobs.sessionlanguage + +$twoWeeks = 1209600 + +# @readme ImpexMedia +# Are there more than a handful (>100) of generated impex medias? +# SELECT +# COUNT(*) +# FROM +# {ImpexMedia AS i} +# WHERE +# ( +# {i:code} LIKE '0_______' +# OR {i:code} LIKE 'generated impex media - %' +# ) +INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true]; searchQuery; retentionTimeSeconds; actionReference ; + ; impexMediaRule ; " + SELECT {i:pk}, {i:itemtype} + FROM {ImpexMedia AS i} + WHERE ( {i:code} LIKE '0_______' OR {i:code} LIKE 'generated impex media - %' ) + AND {i:modifiedTime} < ?CALC_RETIREMENT_TIME" ; $twoWeeks ; basicRemoveCleanupAction ; +INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code); batchSize + ; impexMediaCleanupJob ; impexMediaRule ; 1000 +INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = $sessionLanguage] + ; impexMediaCleanupCronJob ; impexMediaCleanupJob ; +INSERT Trigger; cronJob(code)[unique = true]; cronExpression +# every day at 05:00 + ; impexMediaCleanupCronJob ; 0 0 5 * * ? diff --git a/resources/impex/sanecleanup/impex/001_cleanup-impex.impex b/resources/impex/sanecleanup/impex/001_cleanup-impex.impex deleted file mode 100644 index b326e40..0000000 --- a/resources/impex/sanecleanup/impex/001_cleanup-impex.impex +++ /dev/null @@ -1,14 +0,0 @@ -$twoWeeks = 1209600 -INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true]; searchQuery; retentionTimeSeconds; actionReference ; - ; impexMediaRule ; " - SELECT {i:pk}, {i:itemtype} - FROM {ImpexMedia AS i} - WHERE {i:code} LIKE '00______' - AND {i:modifiedTime} < ?CALC_RETIREMENT_TIME" ; $twoWeeks ; basicRemoveCleanupAction ; -INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code); batchSize - ; impexMediaCleanupJob ; impexMediaRule ; 1000 -INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = en] - ; impexMediaCleanupCronJob ; impexMediaCleanupJob ; -INSERT_UPDATE Trigger; cronJob(code)[unique = true]; cronExpression -# every day at midnight - ; impexMediaCleanupCronJob ; 0 0 0 * * ? \ No newline at end of file diff --git a/resources/impex/sanecleanup/impex/002-cleanup-distributed-impex.impex b/resources/impex/sanecleanup/impex/002-cleanup-distributed-impex.impex new file mode 100644 index 0000000..78b96e5 --- /dev/null +++ b/resources/impex/sanecleanup/impex/002-cleanup-distributed-impex.impex @@ -0,0 +1,102 @@ +# Import config properties into impex macros +UPDATE GenericItem[processor=de.hybris.platform.commerceservices.impex.impl.ConfigPropertyImportProcessor];pk[unique=true] +$sessionLanguage=$config-sanecleanup.jobs.sessionlanguage + +$twoWeeks = 1209600 + +# @readme ImpExImportCronJob (distributed impex) +# - More than ~10 `FINISHED` distributed impex jobs? +# - More than a few `PAUSED` jobs? You may have a faulty distributed impex script. +# SELECT +# {s:code} AS "status", +# COUNT({i:pk}) AS "total", +# MIN({i:modifiedtime}) AS "oldest", +# MAX({i:modifiedtime}) AS "newest" +# FROM +# {ImpExImportCronJob AS i +# LEFT JOIN +# CronJobStatus AS s +# ON {i:status} = {s:pk} } +# WHERE +# {i:code} LIKE 'distributed-impex-%' +# GROUP BY +# {s:code} +INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true]; searchQuery; retentionTimeSeconds; actionReference ; +; distributedImpexCronJobRule ; " +SELECT {i:pk}, {i:itemType} + FROM {ImpExImportCronJob AS i} + WHERE {i:code} LIKE 'distributed-impex-%' + AND {i:modifiedtime} < ?CALC_RETIREMENT_TIME" ; $twoWeeks ; basicRemoveCleanupAction ; +INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code); batchSize +; distributedImpexCronJobCleanupJob ; distributedImpexCronJobRule ; 1000 +INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = $sessionLanguage] +; distributedImpexCronJobCleanupCronJob ; distributedImpexCronJobCleanupJob ; +INSERT Trigger; cronJob(code)[unique = true]; cronExpression +# every day at 04:30 +; distributedImpexCronJobCleanupCronJob ; 0 30 4 * * ? + +INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true]; searchQuery; retentionTimeSeconds; actionReference ; +; distributedImpexJobRule ; " + SELECT {j:pk}, {j:itemtype} + FROM {ImpExImportJob AS j LEFT JOIN ImpExImportCronJob as cj on {cj:job} = {j:pk} } + WHERE {j:code} LIKE 'distributed-impex-%' + AND {cj:pk} IS NULL" ; 0 ; basicRemoveCleanupAction ; +INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code); batchSize +; distributedImpexJobCleanupJob ; distributedImpexJobRule ; 1000 +INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = $sessionLanguage] +; distributedImpexJobCleanupCronJob ; distributedImpexJobCleanupJob ; +INSERT Trigger; cronJob(code)[unique = true]; cronExpression +# every day at 05:00 +; distributedImpexJobCleanupCronJob ; 0 0 5 * * ? + +INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true]; searchQuery; retentionTimeSeconds; actionReference ; +; distributedImportProcessRule ; " + SELECT {p:pk}, {p:itemtype} + FROM {DistributedImportProcess AS p LEFT JOIN ImpExImportCronJob as cj on {p:impExImportCronJob} = {cj:pk} } + WHERE {cj:pk} IS NULL" ; 0 ; basicRemoveCleanupAction ; +INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code); batchSize +; distributedImportProcessCleanupJob ; distributedImportProcessRule ; 1000 +INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = $sessionLanguage] +; distributedImportProcessCleanupCronJob ; distributedImportProcessCleanupJob ; +INSERT Trigger; cronJob(code)[unique = true]; cronExpression +# every day at 05:00 +; distributedImportProcessCleanupCronJob ; 0 0 5 * * ? + +INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true]; searchQuery; retentionTimeSeconds; actionReference ; +; importBatchRule ; " + SELECT {b:pk}, {b:itemtype} + FROM {ImportBatch AS b LEFT JOIN DistributedImportProcess as p on {b:process} = {p:pk} } + WHERE {p:pk} IS NULL" ; 0 ; basicRemoveCleanupAction ; +INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code); batchSize +; importBatchCleanupJob ; importBatchRule ; 1000 +INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = $sessionLanguage] +; importBatchCleanupCronJob ; importBatchCleanupJob ; +INSERT Trigger; cronJob(code)[unique = true]; cronExpression +# every day at 05:30 +; importBatchCleanupCronJob ; 0 30 5 * * ? + +# @readme ImportBatchContent +# Are there any left-over distributed import batches? +# SELECT +# COUNT({c:pk}) AS "total", +# MIN({c:modifiedTime}) AS "oldest", +# MAX({c:modifiedTime}) AS "newest" +# FROM +# {ImportBatchContent AS c +# LEFT JOIN +# ImportBatch AS b +# ON {b:importContentCode} = {c:code} } +# WHERE +# {b:pk} IS NULL +INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true]; searchQuery; retentionTimeSeconds; actionReference ; +; importBatchContentRule ; " + SELECT {c:pk}, {c:itemtype} + FROM {ImportBatchContent AS c LEFT JOIN ImportBatch as b on {b:importContentCode} = {c:code} } + WHERE {b:pk} IS NULL" ; 0 ; basicRemoveCleanupAction ; +INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code); batchSize +; importBatchContentCleanupJob ; importBatchContentRule ; 1000 +INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = $sessionLanguage] +; importBatchContentCleanupCronJob ; importBatchContentCleanupJob ; +INSERT Trigger; cronJob(code)[unique = true]; cronExpression +# every day at 06:00 +; importBatchContentCleanupCronJob ; 0 0 6 * * ? diff --git a/resources/impex/sanecleanup/platformservices/001-enable-cronjoblogs-cleanup.impex b/resources/impex/sanecleanup/platformservices/001-enable-cronjoblogs-cleanup.impex index 78dfada..40a629e 100644 --- a/resources/impex/sanecleanup/platformservices/001-enable-cronjoblogs-cleanup.impex +++ b/resources/impex/sanecleanup/platformservices/001-enable-cronjoblogs-cleanup.impex @@ -6,8 +6,38 @@ UPDATE GenericItem[processor=de.hybris.platform.commerceservices.impex.impl.ConfigPropertyImportProcessor];pk[unique=true] $sessionLanguage=$config-sanecleanup.jobs.sessionlanguage +# @readme LogFile +# Are there are cronjob with more than ~10 logs and/or logs older than 14 days? +# (those are default values for log file retention) +# SELECT +# COALESCE({cj:code}, ''), +# COUNT({l:pk}) AS "total", +# MIN({l:modifiedtime}) AS "oldest", +# MAX({l:modifiedtime}) AS "newest" +# FROM +# {LogFile AS l +# LEFT JOIN +# CronJob AS cj +# ON {l:owner} = {cj:pk} } +# GROUP BY +# {cj:code} +# ORDER BY +# "total" DESC INSERT_UPDATE CronJob; code[unique = true] ; job(code) ;queryCount; sessionLanguage(isoCode)[default = $sessionLanguage] ; cronJobLogCleanupCronJob ; cleanUpLogsJobPerformable ; 2147483647 ; -INSERT_UPDATE Trigger; cronJob(code)[unique = true]; cronExpression +INSERT Trigger; cronJob(code)[unique = true]; cronExpression # every hour ; cronJobLogCleanupCronJob ; 0 0 0/1 * * ? + +INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true]; searchQuery; retentionTimeSeconds; actionReference ; +; orphanedLogsRule ; " + SELECT {l:pk}, {l:itemtype} + FROM {LogFile AS l LEFT JOIN CronJob AS cj ON {l:owner} = {cj:pk} } + WHERE {cj:pk} IS NULL" ; 0 ; basicRemoveCleanupAction ; +INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code); batchSize +; orphanedLogsCleanupJob ; orphanedLogsRule ; 1000 +INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = $sessionLanguage] +; orphanedLogsCleanupCronJob ; orphanedLogsCleanupJob ; +INSERT Trigger; cronJob(code)[unique = true]; cronExpression +# every day at midnight +; impexMediaCleanupCronJob ; 0 0 0 * * ? diff --git a/resources/impex/sanecleanup/processing/001-cleanup-cronjobhistory.impex b/resources/impex/sanecleanup/processing/001-cleanup-cronjobhistory.impex index fe3d4b5..e54b637 100644 --- a/resources/impex/sanecleanup/processing/001-cleanup-cronjobhistory.impex +++ b/resources/impex/sanecleanup/processing/001-cleanup-cronjobhistory.impex @@ -7,7 +7,24 @@ UPDATE GenericItem[processor=de.hybris.platform.commerceservices.impex.impl.ConfigPropertyImportProcessor];pk[unique=true] $sessionLanguage=$config-sanecleanup.jobs.sessionlanguage -# RETENTION RULE for cleaning up cron job history items +# @readme CronJobHistory +# Is there any job with > 50 histories and/or histories older than an hour? +# +# This cleanup is enabled by default in recent SAP Commerce patch releases, so this query shouldn't find anything. +# SELECT +# {cj:code}, +# COUNT({h:pk}) AS "total", +# MIN({h:modifiedtime}) AS "oldest", +# MAX({h:modifiedtime}) AS "newest" +# FROM +# {cronjobhistory AS h +# JOIN +# cronjob AS cj +# ON {h:cronjob} = {cj:pk} } +# GROUP BY +# {cj:code} +# ORDER BY +# "total" DESC INSERT_UPDATE FlexibleSearchRetentionRule;code[unique=true];searchQuery;actionReference; "#% beforeEach: import de.hybris.platform.core.Registry; @@ -67,7 +84,7 @@ if (cronJob != null) }" ; cronJobHistoryRetentionCronJob; cronJobHistoryRetentionJob; -INSERT_UPDATE Trigger; cronJob(code)[unique=true]; cronExpression +INSERT Trigger; cronJob(code)[unique=true]; cronExpression # every hour "#% beforeEach: import de.hybris.platform.core.Registry; diff --git a/resources/impex/sanecleanup/processing/002-cleanup-cronjobs.impex b/resources/impex/sanecleanup/processing/002-cleanup-cronjobs.impex index 6620fae..f2a0114 100644 --- a/resources/impex/sanecleanup/processing/002-cleanup-cronjobs.impex +++ b/resources/impex/sanecleanup/processing/002-cleanup-cronjobs.impex @@ -6,12 +6,41 @@ $sessionLanguage = $config-sanecleanup.jobs.sessionlanguage $twoWeeks = 1209600 $oneDay = 86400 + +# @readme CronJob (auto-generated) +# Are there too many (>10) outdated, auto-geneated jobs in your system? +# SELECT +# {t:code} AS "CronJob Type", +# COUNT({c:pk}) AS "total", +# MIN({c:modifiedtime}) AS "oldest", +# MAX({c:modifiedtime}) AS "newest" +# FROM +# {CronJob AS c +# JOIN +# ComposedType AS t +# ON {c:itemtype} = {t:pk} +# LEFT JOIN +# TRIGGER AS trg +# ON {trg:cronjob} = {c:pk} } +# WHERE +# {trg:pk} IS NULL +# AND {c:code} LIKE '00%' +# AND {t:code} IN +# ( +# 'ImpExImportCronJob', +# 'CatalogVersionSyncCronJob', +# 'SolrIndexerCronJob' +# ) +# GROUP BY +# {t:code} +# ORDER BY +# "total" DESC INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true]; searchQuery; retentionTimeSeconds; actionReference ; ; cronJobRule ; " SELECT {c:pk}, {c:itemType} FROM {CronJob AS c JOIN ComposedType AS t ON {c:itemtype} = {t:pk} LEFT JOIN Trigger AS trg ON {trg:cronjob} = {c:pk} } WHERE {trg:pk} IS NULL - AND {c:code} LIKE '00______%' + AND {c:code} LIKE '00%' AND {t:code} IN ( 'ImpExImportCronJob', 'CatalogVersionSyncCronJob', 'SolrIndexerCronJob' ) AND {c:endTime} < ?CALC_RETIREMENT_TIME" ; $twoWeeks ; basicRemoveCleanupAction ; ; solrJobRule ; " @@ -26,7 +55,7 @@ INSERT_UPDATE RetentionJob; code[unique = true]; retentionRule(code); batchSize INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = $sessionLanguage] ; cronJobCleanupCronJob ; cronJobCleanupJob ; ; solrJobCleanupCronJob ; solrJobCleanupJob ; -INSERT_UPDATE Trigger; cronJob(code)[unique = true]; cronExpression -# every day at midnight - ; cronJobCleanupCronJob ; 0 0 0 * * ? - ; solrJobCleanupCronJob ; 0 0 0 * * ? +INSERT Trigger; cronJob(code)[unique = true]; cronExpression +# every day at 04:00 + ; cronJobCleanupCronJob ; 0 0 4 * * ? + ; solrJobCleanupCronJob ; 0 0 4 * * ? diff --git a/resources/impex/sanecleanup/processing/003-cleanup-businessprocess.impex b/resources/impex/sanecleanup/processing/003-cleanup-businessprocess.impex index c08cbbf..f3c5490 100644 --- a/resources/impex/sanecleanup/processing/003-cleanup-businessprocess.impex +++ b/resources/impex/sanecleanup/processing/003-cleanup-businessprocess.impex @@ -2,17 +2,154 @@ UPDATE GenericItem[processor=de.hybris.platform.commerceservices.impex.impl.ConfigPropertyImportProcessor];pk[unique=true] $sessionLanguage=$config-sanecleanup.jobs.sessionlanguage -$twoWeeks = 1209600 +$fourWeeks = 2419200 +$sixMonths = 15778476 + +# @readme BusinessProcess +# Are there too many (let's say > 1000) or very old BusinessProcess in your system? +# +# Also, if a lot of processes are stuck in "RUNNING" / "WAITING", you have to investigate what's wrong. +# (What is causing your processes to be stuck?) +# SELECT {p:processDefinitionName}, +# {s:code} AS "status", +# COUNT({p:pk}) AS "total", +# MIN({p:modifiedTime}) AS "oldest", +# MAX({p:modifiedTime}) AS "newest" +# FROM {BusinessProcess AS p LEFT JOIN ProcessState AS s ON {p:state} = {s:pk} } +# GROUP BY {p:processDefinitionName}, {s:code} +# ORDER BY "total" DESC INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true] ; searchQuery; retentionTimeSeconds; actionReference ; - ; businessProcessRule ; " - SELECT {p:pk}, {p:itemtype} +; businessProcessRule ; " + SELECT {p:pk}, {p:itemtype} FROM {BusinessProcess AS p JOIN ProcessState AS s ON {p:state} = {s:pk} } WHERE {s:code} IN ('SUCCEEDED') - AND {p:modifiedTime} < ?CALC_RETIREMENT_TIME" ; $twoWeeks ; basicRemoveCleanupAction ; + AND {p:modifiedTime} < ?CALC_RETIREMENT_TIME" ; $fourWeeks ; basicRemoveCleanupAction ; INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code) ; batchSize ; businessProcessCleanupJob ; businessProcessRule ; 1000 INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = $sessionLanguage] ; businessProcessCleanupCronJob ; businessProcessCleanupJob ; -INSERT_UPDATE Trigger; cronJob(code)[unique = true] ; cronExpression +INSERT Trigger; cronJob(code)[unique = true] ; cronExpression # every day at midnight ; businessProcessCleanupCronJob ; 0 0 0 * * ? + +INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true] ; searchQuery; retentionTimeSeconds; actionReference ; +; failedBusinessProcessRule ; " + SELECT {p:pk}, {p:itemtype} + FROM {BusinessProcess AS p JOIN ProcessState AS s ON {p:state} = {s:pk} } + WHERE {s:code} IN ('FAILED', 'ERROR') + AND {p:modifiedTime} < ?CALC_RETIREMENT_TIME" ; $sixMonths ; basicRemoveCleanupAction ; +INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code) ; batchSize +; failedBusinessProcessCleanupJob ; failedBusinessProcessRule ; 1000 +INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = $sessionLanguage] +; failedBusinessProcessCleanupCronJob ; failedBusinessProcessCleanupJob ; +INSERT Trigger; cronJob(code)[unique = true] ; cronExpression +# every day at midnight +; failedBusinessProcessCleanupCronJob ; 0 0 0 * * ? + +INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true] ; searchQuery; retentionTimeSeconds; actionReference ; +; progressBusinessProcessRule ; " + SELECT {p:pk}, {p:itemtype} + FROM {BusinessProcess AS p JOIN ProcessState AS s ON {p:state} = {s:pk} } + WHERE {s:code} IN ('CREATED', 'RUNNING', 'WAITING') + AND {p:modifiedTime} < ?CALC_RETIREMENT_TIME" ; $sixMonths ; basicRemoveCleanupAction ; +INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code) ; batchSize +; progressBusinessProcessCleanupJob ; progressBusinessProcessRule ; 1000 +INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = $sessionLanguage] +; progressBusinessProcessCleanupCronJob ; progressBusinessProcessCleanupJob ; +INSERT Trigger; cronJob(code)[unique = true] ; cronExpression +# every day at midnight +; progressBusinessProcessCleanupCronJob ; 0 0 0 * * ? + +# @readme TaskCondition +# Is there an excessive amount of "premature events"? Or very old (older than a a few weeks) events? +# +# https://help.sap.com/viewer/d0224eca81e249cb821f2cdf45a82ace/2011/en-US/7e8ff9d7653f43e8890bc8eb395d52a7.html +# SELECT COUNT({tc:pk}), +# MIN({tc:modifiedtime}) AS "oldest", +# MAX({tc:modifiedtime}) AS "newest" +# FROM {TaskCondition AS tc } +# WHERE {tc:task} IS NULL +INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true] ; searchQuery; retentionTimeSeconds; actionReference ; +; prematureTaskConditionRule ; " + SELECT {tc:pk}, {tc:itemtype} + FROM {TaskCondition AS tc } + WHERE {tc:task} IS NULL + AND {tc:modifiedTime} < ?CALC_RETIREMENT_TIME" ; $fourWeeks ; basicRemoveCleanupAction ; +INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code) ; batchSize +; prematureTaskConditionCleanupJob ; prematureTaskConditionRule ; 1000 +INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = $sessionLanguage] +; prematureTaskConditionCleanupCronJob ; prematureTaskConditionCleanupJob ; +INSERT Trigger; cronJob(code)[unique = true] ; cronExpression +# every day at 02:00 +; prematureTaskConditionCleanupCronJob ; 0 0 2 * * ? + +#### delete orphans if BusinessProcess / Task is gone. this shouldn't happen, but you never know + +# TaskCondition - usually assigned to a task. +INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true] ; searchQuery; retentionTimeSeconds; actionReference ; +; orphanedTaskConditionRule ; " + SELECT {tc:pk}, {tc:itemtype} + FROM {TaskCondition AS tc LEFT JOIN Task AS t ON {tc:task} = {t:pk} } + WHERE {tc:task} IS NOT NULL + AND {t:pk} IS NULL" ; 0 ; basicRemoveCleanupAction ; +INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code) ; batchSize +; orphanedTaskConditionCleanupJob ; orphanedTaskConditionRule ; 1000 +INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = $sessionLanguage] +; orphanedTaskConditionCleanupCronJob ; orphanedTaskConditionCleanupJob ; +INSERT Trigger; cronJob(code)[unique = true] ; cronExpression +# every day at 02:00 +; orphanedTaskConditionCleanupCronJob ; 0 0 2 * * ? + +# ProcessTask +INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true] ; searchQuery; retentionTimeSeconds; actionReference ; +; orphanedProcessTaskRule ; " + SELECT {pt:pk}, {pt:itemtype} + FROM {ProcessTask AS pt LEFT JOIN BusinessProcess AS bp ON {pt:process} = {bp:pk} } + WHERE ( + ( {pt:process} IS NOT NULL AND {bp:pk} IS NULL ) + OR + {pt:process} IS NULL + )" ; 0 ; basicRemoveCleanupAction ; +INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code) ; batchSize +; orphanedProcessTaskCleanupJob ; orphanedProcessTaskRule ; 1000 +INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = $sessionLanguage] +; orphanedProcessTaskCleanupCronJob ; orphanedProcessTaskCleanupJob ; +INSERT Trigger; cronJob(code)[unique = true] ; cronExpression +# every day at 02:00 +; orphanedProcessTaskCleanupCronJob ; 0 0 2 * * ? + +# BusinessProcessParameter +INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true] ; searchQuery; retentionTimeSeconds; actionReference ; +; orphanedBusinessProcessParameterRule ; " + SELECT {p:pk}, {p:itemtype} + FROM {BusinessProcessParameter AS p LEFT JOIN BusinessProcess AS bp ON {p:process} = {bp:pk} } + WHERE ( + ( {p:process} IS NOT NULL AND {bp:pk} IS NULL ) + OR + {p:process} IS NULL + )" ; 0 ; basicRemoveCleanupAction ; +INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code) ; batchSize +; orphanedBusinessProcessParameterCleanupJob ; orphanedBusinessProcessParameterRule ; 1000 +INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = $sessionLanguage] +; orphanedBusinessProcessParameterCleanupCronJob ; orphanedBusinessProcessParameterCleanupJob ; +INSERT Trigger; cronJob(code)[unique = true] ; cronExpression +# every day at 02:00 +; orphanedBusinessProcessParameterCleanupCronJob ; 0 0 2 * * ? + +# ProcessTaskLog +INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true] ; searchQuery; retentionTimeSeconds; actionReference ; +; orphanedProcessTaskLogRule ; " + SELECT {l:pk}, {l:itemtype} + FROM {ProcessTaskLog AS l LEFT JOIN BusinessProcess AS bp ON {l:process} = {bp:pk} } + WHERE ( + ( {l:process} IS NOT NULL AND {bp:pk} IS NULL ) + OR + {l:process} IS NULL + )" ; 0 ; basicRemoveCleanupAction ; +INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code) ; batchSize +; orphanedProcessTaskLogCleanupJob ; orphanedProcessTaskLogRule ; 1000 +INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = $sessionLanguage] +; orphanedProcessTaskLogCleanupCronJob ; orphanedProcessTaskLogCleanupJob ; +INSERT Trigger; cronJob(code)[unique = true] ; cronExpression +# every day at 02:00 +; orphanedProcessTaskLogCleanupCronJob ; 0 0 2 * * ? diff --git a/resources/impex/sanecleanup/ruleengine/002-delete-expired-rules.impex b/resources/impex/sanecleanup/ruleengine/002-delete-expired-rules.impex index c069c6d..a61a748 100644 --- a/resources/impex/sanecleanup/ruleengine/002-delete-expired-rules.impex +++ b/resources/impex/sanecleanup/ruleengine/002-delete-expired-rules.impex @@ -5,6 +5,18 @@ UPDATE GenericItem[processor=de.hybris.platform.commerceservices.impex.impl.ConfigPropertyImportProcessor];pk[unique=true] $sessionLanguage=$config-sanecleanup.jobs.sessionlanguage +# @readme AbstractRule +# Are there any outdated rules? i.e rules that aren't valid anymore because their enddate is in the past. +# +# Warning: change `getutcdate()` to your DBMS (for HANA/MySQL: `now()` ) +# SELECT COUNT({ar:pk}), +# MIN({ar:modifiedtime}) AS "oldest", +# MAX({ar:modifiedtime}) AS "newest" +# FROM {AbstractRule AS ar}, {RuleStatus AS rs} +# WHERE {ar:status} = {rs:pk} +# AND {rs:code} = 'PUBLISHED' +# AND {ar:enddate} IS NOT NULL +# AND {ar:enddate} < getutcdate() INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true] ; searchQuery; retentionTimeSeconds; actionReference ; ; abstractRuleRule ; " SELECT {ar:pk}, {ar:itemtype} @@ -17,6 +29,6 @@ INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code) ; ba ; abstractRuleCleanupJob ; abstractRuleRule ; 1000 INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = $sessionLanguage] ; abstractRuleCleanupCronJob ; abstractRuleCleanupJob ; -INSERT_UPDATE Trigger; cronJob(code)[unique = true] ; cronExpression +INSERT Trigger; cronJob(code)[unique = true] ; cronExpression # every day at midnight ; abstractRuleCleanupCronJob ; 0 0 0 * * ? diff --git a/resources/impex/sanecleanup/ruleengine/003-rule-engine-orphans.impex.draft b/resources/impex/sanecleanup/ruleengine/003-rule-engine-orphans.impex.draft new file mode 100644 index 0000000..10c69dd --- /dev/null +++ b/resources/impex/sanecleanup/ruleengine/003-rule-engine-orphans.impex.draft @@ -0,0 +1,23 @@ +# Import config properties into impex macros +UPDATE GenericItem[processor=de.hybris.platform.commerceservices.impex.impl.ConfigPropertyImportProcessor];pk[unique=true] +$sessionLanguage=$config-sanecleanup.jobs.sessionlanguage + +INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true] ; searchQuery; retentionTimeSeconds; actionReference ; +; orphanedPromotionActionParameterRule ; " +SELECT {ap:pk}, {ap:itemtype} + FROM { + PromotionActionParameter AS ap + LEFT JOIN RuleBasedPotentialPromotionMessageAction AS a ON {a:parameters} LIKE + } + WHERE {ar:status} = {rs:pk} + AND {rs:code} = 'PUBLISHED' + AND {ar:enddate} IS NOT NULL + AND {ar:enddate} < ?JAVA_CURRENT_TIME" ; 0 ; basicRemoveCleanupAction ; +INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code) ; batchSize +; abstractRuleCleanupJob ; abstractRuleRule ; 1000 +INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = $sessionLanguage] +; abstractRuleCleanupCronJob ; abstractRuleCleanupJob ; +INSERT_UPDATE Trigger; cronJob(code)[unique = true] ; cronExpression +# every day at midnight +; abstractRuleCleanupCronJob ; 0 0 0 * * ? + diff --git a/resources/impex/sanecleanup/solrfacetsearch/002-cleanup-solrindexoperation.impex b/resources/impex/sanecleanup/solrfacetsearch/002-cleanup-solrindexoperation.impex new file mode 100644 index 0000000..7a43462 --- /dev/null +++ b/resources/impex/sanecleanup/solrfacetsearch/002-cleanup-solrindexoperation.impex @@ -0,0 +1,26 @@ +$twoDays = 172800 + +# @readme SolrIndexOperation +# Too many solr operations (more than ~100 per index)? +# SELECT {i:qualifier}, +# COUNT({o:pk}) AS "total", +# MIN({o:modifiedTime}) AS "oldest", +# MAX({o:modifiedTime}) AS "newest" +# FROM {SolrIndexOperation AS o +# LEFT JOIN SolrIndex AS i +# ON {o:index} = {i:pk} } +# GROUP BY {i:qualifier} +# ORDER BY "total" DESC +INSERT_UPDATE FlexibleSearchRetentionRule; code[unique = true]; searchQuery; retentionTimeSeconds; actionReference ; +; solrIndexOperationRule ; " + SELECT {o:pk}, {o:itemtype} + FROM {SolrIndexOperation AS o} + WHERE {o:endTime} IS NOT NULL + AND {o:modifiedTime} < ?CALC_RETIREMENT_TIME" ; $twoDays ; basicRemoveCleanupAction ; +INSERT_UPDATE RetentionJob; code[unique = true] ; retentionRule(code); batchSize +; solrIndexOperationCleanupJob ; solrIndexOperationRule ; 1000 +INSERT_UPDATE CronJob; code[unique = true] ; job(code) ; sessionLanguage(isoCode)[default = en] +; solrIndexOperationCleanupCronJob ; solrIndexOperationCleanupJob ; +INSERT Trigger; cronJob(code)[unique = true]; cronExpression +# every day at 04:00 +; solrIndexOperationCleanupCronJob ; 0 0 4 * * ? diff --git a/resources/retentionrule-to-impex.groovy b/resources/retentionrule-to-impex.groovy new file mode 100644 index 0000000..dc579da --- /dev/null +++ b/resources/retentionrule-to-impex.groovy @@ -0,0 +1,43 @@ +import de.hybris.platform.retention.RetentionRequestParams +import de.hybris.platform.servicelayer.search.FlexibleSearchQuery +import de.hybris.platform.servicelayer.impex.impl.StreamBasedImpExResource + +// close transaction to avoid errors when creating the impex job in the hac scripting console +de.hybris.platform.tx.Transaction.current().commit() + +def RETENTIION_RULE = 'impexMediaRule' +def ruleQuery = new FlexibleSearchQuery("SELECT {pk} FROM {FlexibleSearchRetentionRule} WHERE {code} = ?rule") +ruleQuery.addQueryParameter("rule", RETENTIION_RULE) + +def rule = flexibleSearchService.searchUnique(ruleQuery) + +// rule.retentionTimeSeconds = 1 +def retentionParams = RetentionRequestParams.builder().withRuleModel(rule).withBatchSize(1000).build(); + +def itemProvider = retentionItemsProviderFactory.create(retentionParams) +def typeToPK = [:].withDefault { [] as Set } + +def items = itemProvider.nextItemsForCleanup() +while (items) +{ + items.forEach { + typeToPK[it.itemType].add(it.pk) + } + items = itemProvider.nextItemsForCleanup() +} + +typeToPK.forEach { type, pks -> + def impex = "REMOVE ${type};pk[unique=true]\n" + impex += ';' + pks.join(";\n;") + ";" + + def impexResource = new StreamBasedImpExResource(new ByteArrayInputStream(impex.getBytes('UTF-8')), 'UTF-8') + + def importConfig = spring.getBean('importConfig') + importConfig.removeOnSuccess = true + importConfig.script = impexResource + importConfig.synchronous = false + + def importResult = importService.importData(importConfig) + + println("Bulk-delete ${type}: ${importResult.cronJob.code}") +} \ No newline at end of file diff --git a/resources/sanecleanup-spring.xml b/resources/sanecleanup-spring.xml index e5a9779..a0a9689 100644 --- a/resources/sanecleanup-spring.xml +++ b/resources/sanecleanup-spring.xml @@ -5,16 +5,17 @@ xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd"> - + - + + \ No newline at end of file diff --git a/src/mpern/sap/cleanup/AfterInitListener.java b/src/mpern/sap/cleanup/CleanupAfterInitListener.java similarity index 55% rename from src/mpern/sap/cleanup/AfterInitListener.java rename to src/mpern/sap/cleanup/CleanupAfterInitListener.java index bcd9e9c..f18f2b6 100644 --- a/src/mpern/sap/cleanup/AfterInitListener.java +++ b/src/mpern/sap/cleanup/CleanupAfterInitListener.java @@ -2,7 +2,6 @@ import de.hybris.bootstrap.config.ConfigUtil; import de.hybris.bootstrap.config.ExtensionInfo; -import de.hybris.platform.core.PK; import de.hybris.platform.core.Registry; import de.hybris.platform.core.model.initialization.SystemSetupAuditModel; import de.hybris.platform.servicelayer.event.events.AfterInitializationEndEvent; @@ -16,7 +15,6 @@ import de.hybris.platform.servicelayer.search.FlexibleSearchService; import de.hybris.platform.servicelayer.search.SearchResult; import de.hybris.platform.servicelayer.user.UserService; -import de.hybris.platform.tx.Transaction; import de.hybris.platform.util.persistence.PersistenceUtils; import mpern.sap.cleanup.constants.SanecleanupConstants; import org.apache.commons.codec.digest.DigestUtils; @@ -25,13 +23,14 @@ import org.springframework.core.io.Resource; import org.springframework.core.io.support.PathMatchingResourcePatternResolver; +import java.io.IOException; import java.util.*; import static org.springframework.core.io.support.ResourcePatternResolver.CLASSPATH_ALL_URL_PREFIX; -public class AfterInitListener extends AbstractEventListener { +public class CleanupAfterInitListener extends AbstractEventListener { - private static final Logger LOG = LoggerFactory.getLogger(AfterInitListener.class); + private static final Logger LOG = LoggerFactory.getLogger(CleanupAfterInitListener.class); private final ImportService importService; private final FlexibleSearchService flexibleSearchService; @@ -40,7 +39,10 @@ public class AfterInitListener extends AbstractEventListener extensions = ConfigUtil.getPlatformConfig(Registry.class).getExtensionInfosInBuildOrder(); - final List applicableImpex = new ArrayList<>(); - for (ExtensionInfo extension : extensions) { - Resource[] resources = resolver.getResources(CLASSPATH_ALL_URL_PREFIX + "/impex/sanecleanup/" + extension.getName() + "/*.impex"); - List resourceList = Arrays.asList(resources); - resourceList.sort(Comparator.comparing(Resource::getFilename)); - applicableImpex.addAll(resourceList); - } + final List applicableImpex = getApplicableImpex(); Map hashToResource = calculateHashes(applicableImpex); Map filtered = filterAlreadyImported(hashToResource); - List auditModels = new ArrayList<>(); - for (Map.Entry entry : filtered.entrySet()) { - Resource resource = entry.getValue(); - LOG.info("sanecleanup: Importing {}", resource.getFilename()); - ImportConfig cfg = new ImportConfig(); - cfg.setEnableCodeExecution(true); - cfg.setScript(new StreamBasedImpExResource(resource.getInputStream(), "UTF-8")); - ImportResult importResult = importService.importData(cfg); - if (importResult.isError()) { - LOG.error("sanecleanup: Importing {} FAILED", resource.getFilename()); - } - auditModels.add(generateAuditEntry(entry)); - } - modelService.saveAll(auditModels); - removeOldAuditEntries(hashToResource); + final List auditModels = importImpexes(filtered); + saveAudit(auditModels); } catch (Exception e) { LOG.error("sanecleanup - failed", e); } } - private void removeOldAuditEntries(Map hashToResource) { - FlexibleSearchQuery old = new FlexibleSearchQuery("select {pk} from {SystemSetupAudit} where {className} = ?class and {hash} not in (?valid)"); - old.addQueryParameter("class", AfterInitListener.class.getCanonicalName()); - old.addQueryParameter("valid", hashToResource.keySet().isEmpty() ? Collections.singleton(PK.NULL_PK) : hashToResource.keySet()); - - boolean success = false; - try { - Transaction.current().begin(); - success = PersistenceUtils.doWithSLDPersistence(() -> { - final SearchResult oldModels = this.flexibleSearchService.search(old); - modelService.removeAll(oldModels.getResult()); - return true; - }); - } finally { - if (success) { - Transaction.current().commit(); - } else { - Transaction.current().rollback(); - } + private List getApplicableImpex() throws IOException { + final List extensions = ConfigUtil.getPlatformConfig(Registry.class).getExtensionInfosInBuildOrder(); + final List applicableImpex = new ArrayList<>(); + for (ExtensionInfo extension : extensions) { + Resource[] resources = resolver.getResources(CLASSPATH_ALL_URL_PREFIX + "/impex/sanecleanup/" + extension.getName() + "/*.impex"); + List resourceList = Arrays.asList(resources); + resourceList.sort(Comparator.comparing(Resource::getFilename)); + applicableImpex.addAll(resourceList); } + return applicableImpex; } - private Map calculateHashes(List suitableImpex) throws Exception { - if (suitableImpex.isEmpty()) { + private Map calculateHashes(List applicableImpex) throws Exception { + if (applicableImpex.isEmpty()) { return Collections.emptyMap(); } Map hashToResource = new LinkedHashMap<>(); - for (Resource impex : suitableImpex) { + for (Resource impex : applicableImpex) { // use different hash function to avoid collision with hash calculation for @SystemSetup classes - // de.hybris.platform.core.initialization.SystemSetupCollectorResult#computePatchHash + // ref. de.hybris.platform.core.initialization.SystemSetupCollectorResult#computePatchHash String hash = DigestUtils.sha1Hex(impex.getInputStream()); hashToResource.put(hash, impex); } @@ -126,15 +98,39 @@ private Map filterAlreadyImported(Map hashRe return filtered; } + private List importImpexes(Map filtered) throws IOException { + List auditModels = new ArrayList<>(); + for (Map.Entry entry : filtered.entrySet()) { + Resource resource = entry.getValue(); + LOG.info("sanecleanup: Importing {}", resource.getFilename()); + ImportConfig cfg = new ImportConfig(); + cfg.setEnableCodeExecution(true); + cfg.setScript(new StreamBasedImpExResource(resource.getInputStream(), "UTF-8")); + ImportResult importResult = importService.importData(cfg); + if (importResult.isError()) { + LOG.error("sanecleanup: Importing {} FAILED", resource.getFilename()); + } + auditModels.add(generateAuditEntry(entry)); + } + return auditModels; + } + private SystemSetupAuditModel generateAuditEntry(Map.Entry entry) { final SystemSetupAuditModel audit = modelService.create(SystemSetupAuditModel.class); audit.setHash(entry.getKey()); audit.setName(entry.getValue().getFilename()); - audit.setClassName(AfterInitListener.class.getCanonicalName()); + audit.setClassName(CleanupAfterInitListener.class.getCanonicalName()); audit.setMethodName("onEvent"); audit.setRequired(false); audit.setExtensionName(SanecleanupConstants.EXTENSIONNAME); audit.setUser(userService.getCurrentUser()); return audit; } + + private void saveAudit(final List auditModels) { + PersistenceUtils.doWithSLDPersistence(() -> { + modelService.saveAll(auditModels); + return true; + }); + } } diff --git a/src/mpern/sap/cleanup/cms2/CMSVersionGCPerformable.java b/src/mpern/sap/cleanup/cms2/CMSVersionGCPerformable.java index 8b7b9ba..da2b386 100644 --- a/src/mpern/sap/cleanup/cms2/CMSVersionGCPerformable.java +++ b/src/mpern/sap/cleanup/cms2/CMSVersionGCPerformable.java @@ -5,6 +5,7 @@ import de.hybris.platform.cms2.model.CMSVersionModel; import de.hybris.platform.cms2.version.service.CMSVersionGCService; import de.hybris.platform.core.PK; +import de.hybris.platform.core.model.type.ComposedTypeModel; import de.hybris.platform.cronjob.enums.CronJobResult; import de.hybris.platform.cronjob.enums.CronJobStatus; import de.hybris.platform.cronjob.model.CronJobModel; @@ -13,10 +14,11 @@ import de.hybris.platform.servicelayer.cronjob.PerformResult; import de.hybris.platform.servicelayer.search.FlexibleSearchQuery; import de.hybris.platform.servicelayer.search.SearchResult; +import de.hybris.platform.servicelayer.type.TypeService; import de.hybris.platform.tx.Transaction; -import de.hybris.platform.util.FlexibleSearchUtils; import de.hybris.platform.util.typesystem.PlatformStringUtils; import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.time.StopWatch; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.jdbc.core.BatchPreparedStatementSetter; @@ -27,8 +29,7 @@ import java.util.*; //Sane implementation of Content Version GC process. -//Same logic, without creating Business Process for every run and fast delete using SQL -//still not optimal because of the potentially huge "IN" clause to filter out valid versions. +//Same logic, without creating Business Process for every run and fast delete using JDBC batching public class CMSVersionGCPerformable extends AbstractJobPerformable { private static final Logger LOG = LoggerFactory.getLogger(CMSVersionGCPerformable.class); @@ -36,27 +37,29 @@ public class CMSVersionGCPerformable extends AbstractJobPerformable retainableVersions = getRetainableVersions(); - // de.hybris.platform.cms2.version.processengine.action.impl.CollectRelatedCMSVersionsGCProcessAction - Set retainablePKs = collectAllRetainableVersionPKs(retainableVersions); - - if (clearAbortRequestedIfNeeded(cronJobModel)) { - return new PerformResult(CronJobResult.UNKNOWN, CronJobStatus.ABORTED); - } + public boolean isAbortable() { + return true; + } + @Override + public PerformResult perform(CronJobModel cronJobModel) { try { - // de.hybris.platform.cms2.version.processengine.action.impl.RemoveCMSVersionsGCProcessAction + final List retainableVersions = getRetainableVersions(); + Set retainablePKs = collectAllRetainableVersionPKs(retainableVersions); + + if (clearAbortRequestedIfNeeded(cronJobModel)) { + return new PerformResult(CronJobResult.UNKNOWN, CronJobStatus.ABORTED); + } final Optional performResult = deleteObsoleteVersionsInBatches(cronJobModel, retainablePKs); if (performResult.isPresent()) { return performResult.get(); @@ -68,38 +71,68 @@ public PerformResult perform(CronJobModel cronJobModel) { return new PerformResult(CronJobResult.SUCCESS, CronJobStatus.FINISHED); } + // de.hybris.platform.cms2.version.processengine.action.impl.CollectRetainableCMSVersionsGCProcessAction + private List getRetainableVersions() { + int maxAgeInDays = configurationService.getConfiguration().getInt("version.gc.maxAgeDays", 0); + int maxNumberVersions = configurationService.getConfiguration().getInt("version.gc.maxNumberVersions", 0); + return cmsVersionGCService.getRetainableVersions(maxAgeInDays, maxNumberVersions); + } + + // de.hybris.platform.cms2.version.processengine.action.impl.CollectRelatedCMSVersionsGCProcessAction + private Set collectAllRetainableVersionPKs(List retainableVersions) { + Set retainablePKs = new HashSet<>(); + for (CMSVersionModel retainableVersion : retainableVersions) { + if (retainableVersion == null) { + continue; + } + retainablePKs.add(retainableVersion.getPk()); + if (CollectionUtils.isNotEmpty(retainableVersion.getRelatedChildren())) { + retainableVersion.getRelatedChildren().stream() + .filter(Objects::nonNull) + .forEach(v -> { + retainablePKs.add(v.getPk()); + //detach models to avoid memory leaks + modelService.detach(v); + }); + } + modelService.detach(retainableVersion); + } + return retainablePKs; + } + + // de.hybris.platform.cms2.version.processengine.action.impl.RemoveCMSVersionsGCProcessAction private Optional deleteObsoleteVersionsInBatches(CronJobModel cronJobModel, Set retainablePKs) { if (retainablePKs.isEmpty()) { retainablePKs = Collections.singleton(PK.NULL_PK); } - FlexibleSearchQuery versionsToDelete = new FlexibleSearchQuery("select {v:pk} from {cmsversion as v} where {v:pk} NOT IN (?retainable) order by {v:pk} desc", Collections.singletonMap("retainable", retainablePKs)); + FlexibleSearchQuery versionsToDelete = new FlexibleSearchQuery("select {v:pk} from {cmsversion as v} order by {v:pk} desc"); versionsToDelete.setResultClassList(Collections.singletonList(PK.class)); SearchResult result = flexibleSearchService.search(versionsToDelete); if (clearAbortRequestedIfNeeded(cronJobModel)) { return Optional.of(new PerformResult(CronJobResult.UNKNOWN, CronJobStatus.ABORTED)); } - final List pkList = result.getResult(); - final int total = pkList.size(); - int pageSize = 5000; - for(int i = 0; i < total; i += pageSize) { - int endIdx = i + pageSize; - if (endIdx > total) { - endIdx = total; - } - final List batchToDelete = pkList.subList(i, endIdx); + // this is actually faster than sending a massive value list to the DB, i.e. `where {v:pk} NOT IN (?retainable)` + // especially for a non-trivial amount (> 1000) of retainable versions + // query and logic only use PKs -> memory usage is minimal, one PK = 24 bytes of heap on a 64bit JVM + List toDelete = new ArrayList<>(result.getResult()); + final int totalSize = toDelete.size(); + toDelete.removeAll(retainablePKs); + final int deleteCount = toDelete.size(); + StopWatch sw = StopWatch.createStarted(); + int pageSize = cronJobModel.getQueryCount() > 0 ? cronJobModel.getQueryCount() : 1000; + + List statements = prepareDeleteStatements(); + + for (int i = 0; i < deleteCount; i += pageSize) { + int endIdx = Math.min(i + pageSize, deleteCount); + final List batchToDelete = toDelete.subList(i, endIdx); boolean success = false; try { Transaction.current().begin(); - //unfortunately CMSVersion forces jalo because of the needlessly overriden createItem(...) method -// PersistenceUtils.doWithSLDPersistence(() -> { -// final Set collect = batchToDelete.stream().map(pk -> modelService.get(pk)).collect(Collectors.toSet()); -// modelService.removeAll(collect); -// modelService.detachAll(); -// return true; -// }); - deleteBatchWithJDBC(batchToDelete); + deleteBatchWithJDBC(batchToDelete, statements); success = true; + LOG.debug("Deleted {} / {}...", i + batchToDelete.size(), deleteCount); } finally { if (success) { Transaction.current().commit(); @@ -111,71 +144,49 @@ private Optional deleteObsoleteVersionsInBatches(CronJobModel cro return Optional.of(new PerformResult(CronJobResult.UNKNOWN, CronJobStatus.ABORTED)); } } + sw.stop(); + LOG.info("Total versions: {}; Retainable versions: {}; {} versions deleted in {}", totalSize, retainablePKs.size(), deleteCount, sw.toString()); return Optional.empty(); } - private void deleteBatchWithJDBC(final List batchToDelete) { - jdbcTemplate.batchUpdate("delete from cmsversion where pk = ?", new BatchPreparedStatementSetter() { - @Override - public void setValues(PreparedStatement preparedStatement, int i) throws SQLException { - preparedStatement.setLong(1, batchToDelete.get(i).getLong()); - } + private List prepareDeleteStatements() { + ComposedTypeModel versionType = typeService.getComposedTypeForClass(CMSVersionModel.class); + ComposedTypeModel relationType = typeService.getComposedTypeForCode(CMSVersionModel._CMSVERSIONGCPROCESS2CMSVERSION); - @Override - public int getBatchSize() { - return batchToDelete.size(); - } - }); - jdbcTemplate.batchUpdate("delete from cmsversion2cmsversion where sourcepk = ?", new BatchPreparedStatementSetter() { - @Override - public void setValues(PreparedStatement preparedStatement, int i) throws SQLException { - preparedStatement.setLong(1, batchToDelete.get(i).getLong()); - } + List statements = new ArrayList<>(); - @Override - public int getBatchSize() { - return batchToDelete.size(); - } - }); - jdbcTemplate.batchUpdate("delete from cmsversion2cmsversion where targetpk = ?", new BatchPreparedStatementSetter() { - @Override - public void setValues(PreparedStatement preparedStatement, int i) throws SQLException { - preparedStatement.setLong(1, batchToDelete.get(i).getLong()); - } + statements.add(String.format("DELETE FROM %s WHERE %s = ?", versionType.getTable(), + typeService.getAttributeDescriptor(versionType, CMSVersionModel.PK).getDatabaseColumn())); + statements.add(String.format("DELETE FROM %s WHERE %s = ?", relationType.getTable(), + typeService.getAttributeDescriptor(relationType, "source").getDatabaseColumn())); + statements.add(String.format("DELETE FROM %s WHERE %s = ?", relationType.getTable(), + typeService.getAttributeDescriptor(relationType, "target").getDatabaseColumn())); - @Override - public int getBatchSize() { - return batchToDelete.size(); - } - }); - PK invalidation = batchToDelete.get(0); - // de.hybris.platform.util.Utilities.invalidateCache - Object[] key = new Object[]{Cache.CACHEKEY_HJMP, Cache.CACHEKEY_ENTITY, PlatformStringUtils.valueOf(invalidation.getTypeCode()), invalidation}; - Transaction.current().invalidate(key, 3, AbstractCacheUnit.INVALIDATIONTYPE_REMOVED); + return statements; } - private List getRetainableVersions() { - int maxAgeInDays = configurationService.getConfiguration().getInt("version.gc.maxAgeDays", 0); - int maxNumberVersions = configurationService.getConfiguration().getInt("version.gc.maxNumberVersions", 0); - final List retainableVersions = cmsVersionGCService.getRetainableVersions(maxAgeInDays, maxNumberVersions); - return retainableVersions; - } + private void deleteBatchWithJDBC(final List batchToDelete, List deletes) { - private Set collectAllRetainableVersionPKs(List retainableVersions) { - Set retainablePKs = new HashSet<>(); - for (CMSVersionModel retainableVersion : retainableVersions) { - if (retainableVersion == null) { - continue; - } - retainablePKs.add(retainableVersion.getPk()); - if (CollectionUtils.isNotEmpty(retainableVersion.getRelatedChildren())) { - retainableVersion.getRelatedChildren().stream() - .filter(Objects::nonNull) - .forEach(v -> retainablePKs.add(v.getPk())); - } + for (String delete : deletes) { + jdbcTemplate.batchUpdate(delete, new BatchPreparedStatementSetter() { + @Override + public void setValues(PreparedStatement preparedStatement, int i) throws SQLException { + preparedStatement.setLong(1, batchToDelete.get(i).getLong()); + } + + @Override + public int getBatchSize() { + return batchToDelete.size(); + } + }); } - return retainablePKs; + invalidateCache(batchToDelete); } - + //ref. de.hybris.platform.util.Utilities.invalidateCache + private void invalidateCache(List batchToDelete) { + PK invalidation = batchToDelete.get(0); + Object[] key = new Object[]{Cache.CACHEKEY_HJMP, Cache.CACHEKEY_ENTITY, PlatformStringUtils.valueOf(invalidation.getTypeCode()), invalidation}; + Transaction.current().invalidate(key, 3, AbstractCacheUnit.INVALIDATIONTYPE_REMOVED); + } }