From 8a31de4ed3541f7a27b12e934680145c5f5fd2ad Mon Sep 17 00:00:00 2001 From: floxdeveloper Date: Mon, 8 Jul 2024 16:50:44 +0200 Subject: [PATCH 1/2] Improve annotated text extraction by using another rounding mechanism --- README.md | 4 +++- manifest.json | 2 +- package-lock.json | 4 ++-- package.json | 2 +- src/extractHighlight.ts | 3 +-- src/main.ts | 4 ++-- versions.json | 3 ++- 7 files changed, 12 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 4ce41ff..5389335 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,8 @@ As such, you can relate comments for your topics (here 'Hello World') from sever ## Versions +1.2.1 improved annotation extraction + 1.2.0 added template settings 1.1.0 add new function `Extract PDF Annotations from single file from path in clipboard` to extract annotations from PDFs outside Obsidian vault @@ -84,7 +86,7 @@ This plugin builds on ideas from Alexis Rondeaus Plugin https://github.com/akaal ## Author -Franz Achermann +Franz Achermann and Florian Stöckl diff --git a/manifest.json b/manifest.json index 0afbc83..cb7674e 100644 --- a/manifest.json +++ b/manifest.json @@ -1,7 +1,7 @@ { "id": "obsidian-extract-pdf-annotations", "name": "Extract PDF Annotations", - "version": "1.2.0", + "version": "1.2.1", "minAppVersion": "1.1.1", "description": "Extract PDF Annotations (Notes and Highlights) and sort them by topics", "author": "Franz Achermann", diff --git a/package-lock.json b/package-lock.json index a2086b9..f08b5e5 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "obsidian-extract-pdf-annotations", - "version": "1.2.0", + "version": "1.2.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "obsidian-extract-pdf-annotations", - "version": "1.2.0", + "version": "1.2.1", "license": "MIT", "dependencies": { "@types/pdf": "^0.0.31", diff --git a/package.json b/package.json index a7aaaba..460bcc7 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "obsidian-extract-pdf-annotations", - "version": "1.2.0", + "version": "1.2.1", "description": "Extract notes and highlights from PDF Files in Obsidian (https://obsidian.md)", "main": "main.js", "scripts": { diff --git a/src/extractHighlight.ts b/src/extractHighlight.ts index 0c5bbe9..cc06ab8 100644 --- a/src/extractHighlight.ts +++ b/src/extractHighlight.ts @@ -17,7 +17,7 @@ function searchQuad(minx: number, maxx: number, miny: number, maxy: number, item if (x.transform[4] + x.width <= maxx) { // end of txt ends before highlight ends return txt + x.str.substr(start) // } else { // else, calculate proporation end to get the expected length - const lenc = Math.floor(x.str.length * (maxx - x.transform[4]) / x.width) - start + const lenc = Math.round(x.str.length * (maxx - x.transform[4]) / x.width) - start return txt + x.str.substr(start, lenc) } }, '') @@ -52,7 +52,6 @@ export function extractHighlight(annot: any, items: any) { // accumulate all annotations in the array total async function loadPage(page, pagenum: number, file: PDFFile, containingFolder: string, total: object[]) { let annotations = await page.getAnnotations() - // console.log('Annotations', annotations) annotations = annotations.filter(function (anno) { return SUPPORTED_ANNOTS.indexOf(anno.subtype) >= 0; diff --git a/src/main.ts b/src/main.ts index 1be4e0a..3a16027 100644 --- a/src/main.ts +++ b/src/main.ts @@ -130,8 +130,8 @@ export default class PDFAnnotationPlugin extends Plugin { const filePathWithoutQuotes = filePathFromClipboard.replace(/"/g, ''); const stats = fs.statSync(filePathWithoutQuotes); if (stats.isFile()) { - const pdfjsLib = await loadPdfJs() - const binaryContent = await FileSystemAdapter.readLocalFile(filePathWithoutQuotes) + const pdfjsLib = await loadPdfJs(); + const binaryContent = await FileSystemAdapter.readLocalFile(filePathWithoutQuotes); const filePathWithSlashs: string = filePathWithoutQuotes.replace(/\\/g, '/'); const filePathSplits: string[] = filePathWithSlashs.split('/'); const fileName = filePathSplits.last(); diff --git a/versions.json b/versions.json index c627e18..e85b021 100644 --- a/versions.json +++ b/versions.json @@ -3,5 +3,6 @@ "1.0.3": "0.12.0", "1.0.4": "0.12.0", "1.1.0": "1.1.1", - "1.2.0": "1.1.1" + "1.2.0": "1.1.1", + "1.2.1": "1.1.1" } \ No newline at end of file From 5820885ec6bc155d4dbfffe5babd74c44846ea07 Mon Sep 17 00:00:00 2001 From: floxdeveloper Date: Thu, 8 Aug 2024 20:26:12 +0200 Subject: [PATCH 2/2] Adjust start of the word extraction as well by changing the rounding algorithm --- src/extractHighlight.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/extractHighlight.ts b/src/extractHighlight.ts index cc06ab8..0db2eff 100644 --- a/src/extractHighlight.ts +++ b/src/extractHighlight.ts @@ -13,7 +13,7 @@ function searchQuad(minx: number, maxx: number, miny: number, maxy: number, item if (x.transform[4] > maxx) return txt // start of text after highlight ends const start = (x.transform[4] >= minx ? 0 : // start at pos 0, when text starts after hightlight start - Math.floor(x.str.length * (minx - x.transform[4]) / x.width)) // otherwise, rule of three: start proportional + Math.round(x.str.length * (minx - x.transform[4]) / x.width)) // otherwise, rule of three: start proportional if (x.transform[4] + x.width <= maxx) { // end of txt ends before highlight ends return txt + x.str.substr(start) // } else { // else, calculate proporation end to get the expected length