Skip to content

Commit

Permalink
update bagit-profile and ocrd_tool.schema.yml from spec
Browse files Browse the repository at this point in the history
  • Loading branch information
kba committed Jun 1, 2022
1 parent 38f361a commit 7a8e8e2
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 9 deletions.
8 changes: 2 additions & 6 deletions ocrd_validators/ocrd_validators/bagit-profile.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
BagIt-Profile-Info:
BagIt-Profile-Identifier: https://ocr-d.de/bagit-profile.json
BagIt-Profile-Identifier: https://ocr-d.de/en/spec/bagit-profile.json
BagIt-Profile-Version: '1.2.0'
Source-Organization: OCR-D
External-Description: BagIt profile for OCR data
Expand All @@ -14,10 +14,6 @@ Bag-Info:
Ocrd-Mets:
required: false
default: 'mets.xml'
Ocrd-Manifestation-Depth:
required: false
default: partial
values: ["partial", "full"]
Ocrd-Identifier:
required: true
Ocrd-Checksum:
Expand All @@ -34,7 +30,7 @@ Tag-Files-Allowed:
- sources.csv
- metadata/*.xml
- metadata/*.txt
Allow-Fetch.txt: true
Allow-Fetch.txt: false
Serialization: required
Accept-Serialization: application/zip
Accept-BagIt-Version:
Expand Down
87 changes: 84 additions & 3 deletions ocrd_validators/ocrd_validators/ocrd_tool.schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,13 @@ properties:
type: array
items:
type: string
pattern: '^OCR-D-[A-Z0-9-]+$'
# pattern: '^OCR-D-[A-Z0-9-]+$'
output_file_grp:
description: Output fileGrp@USE this tool produces by default
type: array
items:
type: string
pattern: '^OCR-D-[A-Z0-9-]+$'
# pattern: '^OCR-D-[A-Z0-9-]+$'
parameters:
description: Object describing the parameters of a tool. Keys are parameter names, values sub-schemas.
type: object
Expand All @@ -73,6 +73,30 @@ properties:
description: Subtype, such as `float` for type `number` or `uri` for type `string`.
description:
description: Concise description of syntax and semantics of this parameter
items:
type: object
description: describe the items of an array further
minimum:
type: number
description: Minimum value for number parameters, including the minimum
maximum:
type: number
description: Maximum value for number parameters, including the maximum
exclusiveMinimum:
type: number
description: Minimum value for number parameters, excluding the minimum
exclusiveMaximum:
type: number
description: Maximum value for number parameters, excluding the maximum
multipleOf:
type: number
description: For number values, those values must be multiple of this number
properties:
type: object
description: Describe the properties of an object value
additionalProperties:
type: boolean
description: Whether an object value may contain properties not explicitly defined
required:
type: boolean
description: Whether this parameter is required
Expand All @@ -83,7 +107,15 @@ properties:
description: List the allowed values if a fixed list.
content-type:
type: string
description: "If parameter is reference to file: Media type of the file"
default: 'application/octet-stream'
description: >
The media type of resources this processor expects for
this parameter. Most processors use files for resources
(e.g. `*.traineddata` for `ocrd-tesserocr-recognize`)
while others use directories of files (e.g. `default` for
`ocrd-eynollah-segment`). If a parameter requires
directories, it must set `content-type` to
`text/directory`.
cacheable:
type: boolean
description: "If parameter is reference to file: Whether the file should be cached, e.g. because it is large and won't change."
Expand Down Expand Up @@ -126,3 +158,52 @@ properties:
- layout/segmentation/word
- layout/segmentation/classification
- layout/analysis
resource_locations:
type: array
description: The locations in the filesystem this processor supports for resource lookup
default: ['data', 'cwd', 'system', 'module']
items:
type: string
enum: ['data', 'cwd', 'system', 'module']
resources:
type: array
description: Resources for this processor
items:
type: object
additionalProperties: false
required:
- url
- description
- name
- size
properties:
url:
type: string
description: URLs of all components of this resource
description:
type: string
description: A description of the resource
name:
type: string
description: Name to store the resource as
type:
type: string
enum: ['file', 'directory', 'archive']
default: file
description: Type of the URL
parameter_usage:
type: string
description: Defines how the parameter is to be used
enum: ['as-is', 'without-extension']
default: 'as-is'
path_in_archive:
type: string
description: if type is archive, the resource is at this location in the archive
default: '.'
version_range:
type: string
description: Range of supported versions, syntax like in PEP 440
default: '>= 0.0.1'
size:
type: number
description: Size of the resource in bytes

0 comments on commit 7a8e8e2

Please sign in to comment.