Skip to content

Commit

Permalink
Merge branch 'mexthecat-802'
Browse files Browse the repository at this point in the history
  • Loading branch information
kba committed Nov 23, 2022
2 parents 1b5a362 + 4d5af2b commit 20ddd10
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 2 deletions.
2 changes: 1 addition & 1 deletion ocrd/ocrd/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def download_file(self, f, _recursion_count=0):
with pushd_popd(self.directory):
try:
# If the f.url is already a file path, and is within self.directory, do nothing
url_path = Path(f.url).resolve()
url_path = Path(f.url).absolute()
if not (url_path.exists() and url_path.relative_to(str(Path(self.directory).resolve()))):
raise Exception("Not already downloaded, moving on")
except Exception as e:
Expand Down
Binary file added tests/data/symlink-target
Binary file not shown.
1 change: 1 addition & 0 deletions tests/data/symlink-workspace/OCR-D-IMG/white.tif
33 changes: 33 additions & 0 deletions tests/data/symlink-workspace/mets.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<?xml version="1.0" encoding="UTF-8"?>
<mets:mets xmlns:mets="http://www.loc.gov/METS/" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="info:lc/xmlns/premis-v2 http://www.loc.gov/standards/premis/v2/premis-v2-0.xsd http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/mets.xsd http://www.loc.gov/mix/v10 http://www.loc.gov/standards/mix/mix10/mix10.xsd">
<mets:metsHdr CREATEDATE="2022-02-15T08:50:20.326804">
<mets:agent TYPE="OTHER" OTHERTYPE="SOFTWARE" ROLE="CREATOR">
<mets:name>ocrd/core v2.30.0</mets:name>
</mets:agent>
</mets:metsHdr>
<mets:dmdSec ID="DMDLOG_0001">
<mets:mdWrap MDTYPE="MODS">
<mets:xmlData>
<mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
<mods:identifier type="purl">strangeValidate-01</mods:identifier>
</mods:mods>
</mets:xmlData>
</mets:mdWrap>
</mets:dmdSec>
<mets:amdSec ID="AMD">
</mets:amdSec>
<mets:fileSec>
<mets:fileGrp USE="OCR-D-IMG">
<mets:file ID="OCR-D-IMG_white" MIMETYPE="image/tif">
<mets:FLocat LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="OCR-D-IMG/white.tif"/>
</mets:file>
</mets:fileGrp>
</mets:fileSec>
<mets:structMap TYPE="PHYSICAL">
<mets:div TYPE="physSequence">
<mets:div TYPE="page" ID="P_white">
<mets:fptr FILEID="OCR-D-IMG_white"/>
</mets:div>
</mets:div>
</mets:structMap>
</mets:mets>
1 change: 0 additions & 1 deletion tests/data/symlink-workspace/src/test.txt

This file was deleted.

13 changes: 13 additions & 0 deletions tests/validator/test_workspace_validator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
from tempfile import TemporaryDirectory
from pathlib import Path
from os.path import join
from shutil import copytree

Expand Down Expand Up @@ -233,6 +234,18 @@ def test_pcgtsid(self):
report = WorkspaceValidator.validate(self.resolver, join(wsdir, 'mets.xml'))
self.assertIn('pc:PcGts/@pcGtsId differs from mets:file/@ID: "foo" !== "PAGE_0017_PAGE"', report.warnings)

def test_symlink(self):
"""
Data from https://github.com/OCR-D/core/issues/802
"""
report = WorkspaceValidator.validate(
Resolver(), None, src_dir=str(Path(__file__).parent.parent / "data/symlink-workspace"),
skip=['page', 'mets_unique_identifier', 'mets_file_group_names', 'mets_files', 'pixel_density', 'page_xsd', 'mets_xsd'],
download=False,
)
print(report.errors)
assert report.is_valid


if __name__ == '__main__':
main(__file__)

0 comments on commit 20ddd10

Please sign in to comment.